sierra-research 1.3.11__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (254) hide show
  1. sierra/__init__.py +3 -3
  2. sierra/core/__init__.py +3 -3
  3. sierra/core/batchroot.py +223 -0
  4. sierra/core/cmdline.py +681 -1057
  5. sierra/core/compare.py +11 -0
  6. sierra/core/config.py +96 -88
  7. sierra/core/engine.py +306 -0
  8. sierra/core/execenv.py +380 -0
  9. sierra/core/expdef.py +11 -0
  10. sierra/core/experiment/__init__.py +1 -0
  11. sierra/core/experiment/bindings.py +150 -101
  12. sierra/core/experiment/definition.py +414 -245
  13. sierra/core/experiment/spec.py +83 -85
  14. sierra/core/exproot.py +44 -0
  15. sierra/core/generators/__init__.py +10 -0
  16. sierra/core/generators/experiment.py +528 -0
  17. sierra/core/generators/generator_factory.py +138 -137
  18. sierra/core/graphs/__init__.py +23 -0
  19. sierra/core/graphs/bcbridge.py +94 -0
  20. sierra/core/graphs/heatmap.py +245 -324
  21. sierra/core/graphs/pathset.py +27 -0
  22. sierra/core/graphs/schema.py +77 -0
  23. sierra/core/graphs/stacked_line.py +341 -0
  24. sierra/core/graphs/summary_line.py +506 -0
  25. sierra/core/logging.py +3 -2
  26. sierra/core/models/__init__.py +3 -1
  27. sierra/core/models/info.py +19 -0
  28. sierra/core/models/interface.py +52 -122
  29. sierra/core/pipeline/__init__.py +2 -5
  30. sierra/core/pipeline/pipeline.py +228 -126
  31. sierra/core/pipeline/stage1/__init__.py +10 -0
  32. sierra/core/pipeline/stage1/pipeline_stage1.py +45 -31
  33. sierra/core/pipeline/stage2/__init__.py +10 -0
  34. sierra/core/pipeline/stage2/pipeline_stage2.py +8 -11
  35. sierra/core/pipeline/stage2/runner.py +401 -0
  36. sierra/core/pipeline/stage3/__init__.py +12 -0
  37. sierra/core/pipeline/stage3/gather.py +321 -0
  38. sierra/core/pipeline/stage3/pipeline_stage3.py +37 -84
  39. sierra/core/pipeline/stage4/__init__.py +12 -2
  40. sierra/core/pipeline/stage4/pipeline_stage4.py +36 -354
  41. sierra/core/pipeline/stage5/__init__.py +12 -0
  42. sierra/core/pipeline/stage5/pipeline_stage5.py +33 -208
  43. sierra/core/pipeline/yaml.py +48 -0
  44. sierra/core/plugin.py +529 -62
  45. sierra/core/proc.py +11 -0
  46. sierra/core/prod.py +11 -0
  47. sierra/core/ros1/__init__.py +5 -1
  48. sierra/core/ros1/callbacks.py +22 -21
  49. sierra/core/ros1/cmdline.py +59 -88
  50. sierra/core/ros1/generators.py +159 -175
  51. sierra/core/ros1/variables/__init__.py +3 -0
  52. sierra/core/ros1/variables/exp_setup.py +122 -116
  53. sierra/core/startup.py +106 -76
  54. sierra/core/stat_kernels.py +4 -5
  55. sierra/core/storage.py +13 -32
  56. sierra/core/trampoline.py +30 -0
  57. sierra/core/types.py +116 -71
  58. sierra/core/utils.py +103 -106
  59. sierra/core/variables/__init__.py +1 -1
  60. sierra/core/variables/base_variable.py +12 -17
  61. sierra/core/variables/batch_criteria.py +387 -481
  62. sierra/core/variables/builtin.py +135 -0
  63. sierra/core/variables/exp_setup.py +19 -39
  64. sierra/core/variables/population_size.py +72 -76
  65. sierra/core/variables/variable_density.py +44 -68
  66. sierra/core/vector.py +1 -1
  67. sierra/main.py +256 -88
  68. sierra/plugins/__init__.py +119 -0
  69. sierra/plugins/compare/__init__.py +14 -0
  70. sierra/plugins/compare/graphs/__init__.py +19 -0
  71. sierra/plugins/compare/graphs/cmdline.py +120 -0
  72. sierra/plugins/compare/graphs/comparator.py +291 -0
  73. sierra/plugins/compare/graphs/inter_controller.py +531 -0
  74. sierra/plugins/compare/graphs/inter_scenario.py +297 -0
  75. sierra/plugins/compare/graphs/namecalc.py +53 -0
  76. sierra/plugins/compare/graphs/outputroot.py +73 -0
  77. sierra/plugins/compare/graphs/plugin.py +147 -0
  78. sierra/plugins/compare/graphs/preprocess.py +172 -0
  79. sierra/plugins/compare/graphs/schema.py +37 -0
  80. sierra/plugins/engine/__init__.py +14 -0
  81. sierra/plugins/engine/argos/__init__.py +18 -0
  82. sierra/plugins/{platform → engine}/argos/cmdline.py +144 -151
  83. sierra/plugins/{platform/argos/variables → engine/argos/generators}/__init__.py +5 -0
  84. sierra/plugins/engine/argos/generators/engine.py +394 -0
  85. sierra/plugins/engine/argos/plugin.py +393 -0
  86. sierra/plugins/{platform/argos/generators → engine/argos/variables}/__init__.py +5 -0
  87. sierra/plugins/engine/argos/variables/arena_shape.py +183 -0
  88. sierra/plugins/engine/argos/variables/cameras.py +240 -0
  89. sierra/plugins/engine/argos/variables/constant_density.py +112 -0
  90. sierra/plugins/engine/argos/variables/exp_setup.py +82 -0
  91. sierra/plugins/{platform → engine}/argos/variables/physics_engines.py +83 -87
  92. sierra/plugins/engine/argos/variables/population_constant_density.py +178 -0
  93. sierra/plugins/engine/argos/variables/population_size.py +115 -0
  94. sierra/plugins/engine/argos/variables/population_variable_density.py +123 -0
  95. sierra/plugins/engine/argos/variables/rendering.py +108 -0
  96. sierra/plugins/engine/ros1gazebo/__init__.py +18 -0
  97. sierra/plugins/engine/ros1gazebo/cmdline.py +175 -0
  98. sierra/plugins/{platform/ros1robot → engine/ros1gazebo}/generators/__init__.py +5 -0
  99. sierra/plugins/engine/ros1gazebo/generators/engine.py +125 -0
  100. sierra/plugins/engine/ros1gazebo/plugin.py +404 -0
  101. sierra/plugins/engine/ros1gazebo/variables/__init__.py +15 -0
  102. sierra/plugins/engine/ros1gazebo/variables/population_size.py +214 -0
  103. sierra/plugins/engine/ros1robot/__init__.py +18 -0
  104. sierra/plugins/engine/ros1robot/cmdline.py +159 -0
  105. sierra/plugins/{platform/ros1gazebo → engine/ros1robot}/generators/__init__.py +4 -0
  106. sierra/plugins/engine/ros1robot/generators/engine.py +95 -0
  107. sierra/plugins/engine/ros1robot/plugin.py +410 -0
  108. sierra/plugins/{hpc/local → engine/ros1robot/variables}/__init__.py +5 -0
  109. sierra/plugins/engine/ros1robot/variables/population_size.py +146 -0
  110. sierra/plugins/execenv/__init__.py +11 -0
  111. sierra/plugins/execenv/hpc/__init__.py +18 -0
  112. sierra/plugins/execenv/hpc/adhoc/__init__.py +18 -0
  113. sierra/plugins/execenv/hpc/adhoc/cmdline.py +30 -0
  114. sierra/plugins/execenv/hpc/adhoc/plugin.py +131 -0
  115. sierra/plugins/execenv/hpc/cmdline.py +137 -0
  116. sierra/plugins/execenv/hpc/local/__init__.py +18 -0
  117. sierra/plugins/execenv/hpc/local/cmdline.py +31 -0
  118. sierra/plugins/execenv/hpc/local/plugin.py +145 -0
  119. sierra/plugins/execenv/hpc/pbs/__init__.py +18 -0
  120. sierra/plugins/execenv/hpc/pbs/cmdline.py +30 -0
  121. sierra/plugins/execenv/hpc/pbs/plugin.py +121 -0
  122. sierra/plugins/execenv/hpc/slurm/__init__.py +18 -0
  123. sierra/plugins/execenv/hpc/slurm/cmdline.py +30 -0
  124. sierra/plugins/execenv/hpc/slurm/plugin.py +133 -0
  125. sierra/plugins/execenv/prefectserver/__init__.py +18 -0
  126. sierra/plugins/execenv/prefectserver/cmdline.py +66 -0
  127. sierra/plugins/execenv/prefectserver/dockerremote/__init__.py +18 -0
  128. sierra/plugins/execenv/prefectserver/dockerremote/cmdline.py +66 -0
  129. sierra/plugins/execenv/prefectserver/dockerremote/plugin.py +132 -0
  130. sierra/plugins/execenv/prefectserver/flow.py +66 -0
  131. sierra/plugins/execenv/prefectserver/local/__init__.py +18 -0
  132. sierra/plugins/execenv/prefectserver/local/cmdline.py +29 -0
  133. sierra/plugins/execenv/prefectserver/local/plugin.py +133 -0
  134. sierra/plugins/{hpc/adhoc → execenv/robot}/__init__.py +1 -0
  135. sierra/plugins/execenv/robot/turtlebot3/__init__.py +18 -0
  136. sierra/plugins/execenv/robot/turtlebot3/plugin.py +204 -0
  137. sierra/plugins/expdef/__init__.py +14 -0
  138. sierra/plugins/expdef/json/__init__.py +14 -0
  139. sierra/plugins/expdef/json/plugin.py +504 -0
  140. sierra/plugins/expdef/xml/__init__.py +14 -0
  141. sierra/plugins/expdef/xml/plugin.py +386 -0
  142. sierra/{core/hpc → plugins/proc}/__init__.py +1 -1
  143. sierra/plugins/proc/collate/__init__.py +15 -0
  144. sierra/plugins/proc/collate/cmdline.py +47 -0
  145. sierra/plugins/proc/collate/plugin.py +271 -0
  146. sierra/plugins/proc/compress/__init__.py +18 -0
  147. sierra/plugins/proc/compress/cmdline.py +47 -0
  148. sierra/plugins/proc/compress/plugin.py +123 -0
  149. sierra/plugins/proc/decompress/__init__.py +18 -0
  150. sierra/plugins/proc/decompress/plugin.py +96 -0
  151. sierra/plugins/proc/imagize/__init__.py +15 -0
  152. sierra/plugins/proc/imagize/cmdline.py +49 -0
  153. sierra/plugins/proc/imagize/plugin.py +270 -0
  154. sierra/plugins/proc/modelrunner/__init__.py +16 -0
  155. sierra/plugins/proc/modelrunner/plugin.py +250 -0
  156. sierra/plugins/proc/statistics/__init__.py +15 -0
  157. sierra/plugins/proc/statistics/cmdline.py +64 -0
  158. sierra/plugins/proc/statistics/plugin.py +390 -0
  159. sierra/plugins/{hpc → prod}/__init__.py +1 -0
  160. sierra/plugins/prod/graphs/__init__.py +18 -0
  161. sierra/plugins/prod/graphs/cmdline.py +269 -0
  162. sierra/plugins/prod/graphs/collate.py +279 -0
  163. sierra/plugins/prod/graphs/inter/__init__.py +13 -0
  164. sierra/plugins/prod/graphs/inter/generate.py +83 -0
  165. sierra/plugins/prod/graphs/inter/heatmap.py +86 -0
  166. sierra/plugins/prod/graphs/inter/line.py +134 -0
  167. sierra/plugins/prod/graphs/intra/__init__.py +15 -0
  168. sierra/plugins/prod/graphs/intra/generate.py +202 -0
  169. sierra/plugins/prod/graphs/intra/heatmap.py +74 -0
  170. sierra/plugins/prod/graphs/intra/line.py +114 -0
  171. sierra/plugins/prod/graphs/plugin.py +103 -0
  172. sierra/plugins/prod/graphs/targets.py +63 -0
  173. sierra/plugins/prod/render/__init__.py +18 -0
  174. sierra/plugins/prod/render/cmdline.py +72 -0
  175. sierra/plugins/prod/render/plugin.py +282 -0
  176. sierra/plugins/storage/__init__.py +5 -0
  177. sierra/plugins/storage/arrow/__init__.py +18 -0
  178. sierra/plugins/storage/arrow/plugin.py +38 -0
  179. sierra/plugins/storage/csv/__init__.py +9 -0
  180. sierra/plugins/storage/csv/plugin.py +12 -5
  181. sierra/version.py +3 -2
  182. sierra_research-1.5.0.dist-info/METADATA +238 -0
  183. sierra_research-1.5.0.dist-info/RECORD +186 -0
  184. {sierra_research-1.3.11.dist-info → sierra_research-1.5.0.dist-info}/WHEEL +1 -2
  185. sierra/core/experiment/xml.py +0 -454
  186. sierra/core/generators/controller_generator_parser.py +0 -34
  187. sierra/core/generators/exp_creator.py +0 -351
  188. sierra/core/generators/exp_generators.py +0 -142
  189. sierra/core/graphs/scatterplot2D.py +0 -109
  190. sierra/core/graphs/stacked_line_graph.py +0 -251
  191. sierra/core/graphs/stacked_surface_graph.py +0 -220
  192. sierra/core/graphs/summary_line_graph.py +0 -371
  193. sierra/core/hpc/cmdline.py +0 -142
  194. sierra/core/models/graphs.py +0 -87
  195. sierra/core/pipeline/stage2/exp_runner.py +0 -286
  196. sierra/core/pipeline/stage3/imagizer.py +0 -149
  197. sierra/core/pipeline/stage3/run_collator.py +0 -317
  198. sierra/core/pipeline/stage3/statistics_calculator.py +0 -478
  199. sierra/core/pipeline/stage4/graph_collator.py +0 -320
  200. sierra/core/pipeline/stage4/inter_exp_graph_generator.py +0 -240
  201. sierra/core/pipeline/stage4/intra_exp_graph_generator.py +0 -317
  202. sierra/core/pipeline/stage4/model_runner.py +0 -168
  203. sierra/core/pipeline/stage4/rendering.py +0 -283
  204. sierra/core/pipeline/stage4/yaml_config_loader.py +0 -103
  205. sierra/core/pipeline/stage5/inter_scenario_comparator.py +0 -328
  206. sierra/core/pipeline/stage5/intra_scenario_comparator.py +0 -989
  207. sierra/core/platform.py +0 -493
  208. sierra/core/plugin_manager.py +0 -369
  209. sierra/core/root_dirpath_generator.py +0 -241
  210. sierra/plugins/hpc/adhoc/plugin.py +0 -125
  211. sierra/plugins/hpc/local/plugin.py +0 -81
  212. sierra/plugins/hpc/pbs/__init__.py +0 -9
  213. sierra/plugins/hpc/pbs/plugin.py +0 -126
  214. sierra/plugins/hpc/slurm/__init__.py +0 -9
  215. sierra/plugins/hpc/slurm/plugin.py +0 -130
  216. sierra/plugins/platform/__init__.py +0 -9
  217. sierra/plugins/platform/argos/__init__.py +0 -9
  218. sierra/plugins/platform/argos/generators/platform_generators.py +0 -383
  219. sierra/plugins/platform/argos/plugin.py +0 -337
  220. sierra/plugins/platform/argos/variables/arena_shape.py +0 -145
  221. sierra/plugins/platform/argos/variables/cameras.py +0 -243
  222. sierra/plugins/platform/argos/variables/constant_density.py +0 -136
  223. sierra/plugins/platform/argos/variables/exp_setup.py +0 -113
  224. sierra/plugins/platform/argos/variables/population_constant_density.py +0 -175
  225. sierra/plugins/platform/argos/variables/population_size.py +0 -102
  226. sierra/plugins/platform/argos/variables/population_variable_density.py +0 -132
  227. sierra/plugins/platform/argos/variables/rendering.py +0 -104
  228. sierra/plugins/platform/ros1gazebo/__init__.py +0 -9
  229. sierra/plugins/platform/ros1gazebo/cmdline.py +0 -213
  230. sierra/plugins/platform/ros1gazebo/generators/platform_generators.py +0 -137
  231. sierra/plugins/platform/ros1gazebo/plugin.py +0 -335
  232. sierra/plugins/platform/ros1gazebo/variables/__init__.py +0 -10
  233. sierra/plugins/platform/ros1gazebo/variables/population_size.py +0 -204
  234. sierra/plugins/platform/ros1robot/__init__.py +0 -9
  235. sierra/plugins/platform/ros1robot/cmdline.py +0 -175
  236. sierra/plugins/platform/ros1robot/generators/platform_generators.py +0 -112
  237. sierra/plugins/platform/ros1robot/plugin.py +0 -373
  238. sierra/plugins/platform/ros1robot/variables/__init__.py +0 -10
  239. sierra/plugins/platform/ros1robot/variables/population_size.py +0 -146
  240. sierra/plugins/robot/__init__.py +0 -9
  241. sierra/plugins/robot/turtlebot3/__init__.py +0 -9
  242. sierra/plugins/robot/turtlebot3/plugin.py +0 -194
  243. sierra_research-1.3.11.data/data/share/man/man1/sierra-cli.1 +0 -2349
  244. sierra_research-1.3.11.data/data/share/man/man7/sierra-examples.7 +0 -508
  245. sierra_research-1.3.11.data/data/share/man/man7/sierra-exec-envs.7 +0 -331
  246. sierra_research-1.3.11.data/data/share/man/man7/sierra-glossary.7 +0 -285
  247. sierra_research-1.3.11.data/data/share/man/man7/sierra-platforms.7 +0 -358
  248. sierra_research-1.3.11.data/data/share/man/man7/sierra-usage.7 +0 -729
  249. sierra_research-1.3.11.data/data/share/man/man7/sierra.7 +0 -78
  250. sierra_research-1.3.11.dist-info/METADATA +0 -492
  251. sierra_research-1.3.11.dist-info/RECORD +0 -133
  252. sierra_research-1.3.11.dist-info/top_level.txt +0 -1
  253. {sierra_research-1.3.11.dist-info → sierra_research-1.5.0.dist-info}/entry_points.txt +0 -0
  254. {sierra_research-1.3.11.dist-info → sierra_research-1.5.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,271 @@
1
+ # Copyright 2019 John Harwell, All rights reserved.
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ """
6
+ Classes for collating data within a :term:`Batch Experiment`.
7
+
8
+ Collation is the process of "lifting" data from :term:`Experimental Runs
9
+ <Experimental Run>` across all :term:`Experiment` for all experiments in a
10
+ :term:`Batch Experiment` into a single file (a reduce operation). This is
11
+ needed to correctly calculate summary statistics for performance measures in
12
+ stage 3: you can't just run the calculated stddev through the calculations
13
+ because comparing curves of stddev is not meaningful.
14
+ """
15
+
16
+ # Core packages
17
+ import multiprocessing as mp
18
+ import typing as tp
19
+ import queue
20
+ import logging
21
+ import pathlib
22
+
23
+ # 3rd party packages
24
+ import pandas as pd
25
+ import yaml
26
+
27
+ # Project packages
28
+ import sierra.core.variables.batch_criteria as bc
29
+ import sierra.core.plugin as pm
30
+ from sierra.core import types, storage, utils, config, batchroot
31
+ from sierra.core.pipeline.stage3 import gather
32
+
33
+ _logger = logging.getLogger(__name__)
34
+
35
+
36
+ def proc_batch_exp(
37
+ main_config: dict,
38
+ cmdopts: types.Cmdopts,
39
+ pathset: batchroot.PathSet,
40
+ criteria: bc.XVarBatchCriteria,
41
+ ) -> None:
42
+ """Generate :term:`Collated Output Data` files for each experiment.
43
+
44
+ :term:`Collated Output Data` files generated from :term:`Raw Output Data`
45
+ files across :term:`Experimental Runs <Experimental Run>`. Gathered in
46
+ parallel for each experiment for speed, unless disabled with
47
+ ``--processing-parallelism``.
48
+ """
49
+ pool_opts = {}
50
+
51
+ pool_opts["parallelism"] = cmdopts["processing_parallelism"]
52
+
53
+ worker_opts = {
54
+ "project": cmdopts["project"],
55
+ "template_input_leaf": pathlib.Path(cmdopts["expdef_template"]).stem,
56
+ "df_verify": cmdopts["df_verify"],
57
+ "processing_mem_limit": cmdopts["processing_mem_limit"],
58
+ "storage": cmdopts["storage"],
59
+ "df_homogenize": cmdopts["df_homogenize"],
60
+ "project_config_root": cmdopts["project_config_root"],
61
+ }
62
+
63
+ exp_to_proc = utils.exp_range_calc(
64
+ cmdopts["exp_range"], pathset.output_root, criteria.gen_exp_names()
65
+ )
66
+
67
+ with mp.Pool(processes=pool_opts["parallelism"]) as pool:
68
+ _execute_for_batch(
69
+ main_config, pathset, exp_to_proc, worker_opts, pool_opts, pool
70
+ )
71
+
72
+
73
+ def _execute_for_batch(
74
+ main_config: types.YAMLDict,
75
+ pathset: batchroot.PathSet,
76
+ exp_to_proc: tp.List[pathlib.Path],
77
+ worker_opts: types.SimpleDict,
78
+ pool_opts: types.SimpleDict,
79
+ pool,
80
+ ) -> None:
81
+ m = mp.Manager()
82
+ gatherq = m.Queue()
83
+ processq = m.Queue()
84
+
85
+ for exp in exp_to_proc:
86
+ gatherq.put(exp)
87
+
88
+ _logger.debug(
89
+ "Starting %d gatherers, method=%s",
90
+ pool_opts["parallelism"],
91
+ mp.get_start_method(),
92
+ )
93
+
94
+ gathered = [
95
+ pool.apply_async(_gather_worker, (gatherq, processq, main_config, worker_opts))
96
+ for _ in range(0, pool_opts["parallelism"])
97
+ ]
98
+ _logger.debug("Waiting for gathering to finish")
99
+ for g in gathered:
100
+ g.get()
101
+
102
+ _logger.debug(
103
+ "Starting %d processors, method=%s",
104
+ pool_opts["parallelism"],
105
+ mp.get_start_method(),
106
+ )
107
+ processed = [
108
+ pool.apply_async(
109
+ _process_worker,
110
+ (processq, main_config, pathset.stat_interexp_root, worker_opts),
111
+ )
112
+ for _ in range(0, pool_opts["parallelism"])
113
+ ]
114
+
115
+ # To capture the otherwise silent crashes when something goes wrong in
116
+ # worker threads. Any assertions will show and any exceptions will be
117
+ # re-raised.
118
+ for p in processed:
119
+ p.get()
120
+
121
+ pool.close()
122
+ pool.join()
123
+ _logger.debug("Processing finished")
124
+
125
+
126
+ def _gather_worker(
127
+ gatherq: mp.Queue,
128
+ processq: mp.Queue,
129
+ main_config: types.YAMLDict,
130
+ gather_opts: types.SimpleDict,
131
+ ) -> None:
132
+ gatherer = ExpDataGatherer(main_config, gather_opts, processq)
133
+ while True:
134
+ # Wait for 3 seconds after the queue is empty before bailing
135
+ try:
136
+ exp_output_root = gatherq.get(True, 3)
137
+ gatherer(exp_output_root)
138
+ gatherq.task_done()
139
+
140
+ except queue.Empty:
141
+ break
142
+
143
+
144
+ def _process_worker(
145
+ processq: mp.Queue,
146
+ main_config: types.YAMLDict,
147
+ batch_stat_interexp_root: pathlib.Path,
148
+ process_opts: types.SimpleDict,
149
+ ) -> None:
150
+ while True:
151
+ # Wait for 3 seconds after the queue is empty before bailing
152
+ try:
153
+ spec = processq.get(True, 3)
154
+ _proc_single_exp(main_config, batch_stat_interexp_root, process_opts, spec)
155
+ processq.task_done()
156
+ except queue.Empty:
157
+ break
158
+
159
+
160
+ class ExpDataGatherer(gather.BaseGatherer):
161
+ """Gather :term:`Raw Output Data` files across all runs for :term:`Data Collation`.
162
+
163
+ The configured output directory for each run is searched recursively for
164
+ files to gather. To be eligible for gathering and later processing, files
165
+ must:
166
+
167
+ - Be non-empty
168
+
169
+ - Have a suffix which supported by the selected ``--storage`` plugin.
170
+
171
+ - Have a name (last part of absolute path, including extension) which
172
+ matches a configured :term:`Product` in a YAML file. E.g., a graph
173
+ from the :ref:`plugins/prod/graphs` plugin
174
+ """
175
+
176
+ def __init__(self, *args, **kwargs) -> None:
177
+ super().__init__(*args, **kwargs)
178
+ self.logger = logging.getLogger(__name__)
179
+
180
+ def calc_gather_items(
181
+ self, run_output_root: pathlib.Path, exp_name: str
182
+ ) -> tp.List[gather.GatherSpec]:
183
+ to_gather = []
184
+ proj_output_root = run_output_root / str(self.run_metrics_leaf)
185
+ plugin = pm.pipeline.get_plugin_module(self.gather_opts["storage"])
186
+
187
+ config_path = pathlib.Path(
188
+ self.gather_opts["project_config_root"], config.kYAML.collate
189
+ )
190
+
191
+ try:
192
+ collate_config = yaml.load(utils.utf8open(config_path), yaml.FullLoader)
193
+
194
+ except FileNotFoundError:
195
+ self.logger.warning("%s does not exist!", config_path)
196
+ collate_config = {}
197
+
198
+ for item in proj_output_root.rglob("*"):
199
+ # Must be a file (duh)
200
+ if not item.is_file():
201
+ continue
202
+
203
+ # Has to be a supported suffix for storage plugin
204
+ if (
205
+ not any(s in plugin.suffixes() for s in item.suffixes)
206
+ or item.stat().st_size == 0
207
+ ):
208
+ continue
209
+
210
+ # Any number of perf metrics can be configured, so look for a match.
211
+ files = collate_config["intra-exp"]
212
+ perf_confs = [f for f in files if f["file"] in item.name]
213
+ if not perf_confs:
214
+ continue
215
+
216
+ # If we get a file match, then all the columns from that file should
217
+ # be added to the set of things to collate.
218
+ for conf in perf_confs:
219
+ for col in conf["cols"]:
220
+ to_gather.append(
221
+ gather.GatherSpec(
222
+ exp_name=exp_name,
223
+ item_stem_path=item.relative_to(proj_output_root),
224
+ collate_col=col,
225
+ )
226
+ )
227
+ return to_gather
228
+
229
+
230
+ def _proc_single_exp(
231
+ main_config: types.YAMLDict,
232
+ batch_stat_collate_root: pathlib.Path,
233
+ process_opts: types.SimpleDict,
234
+ spec: gather.ProcessSpec,
235
+ ) -> None:
236
+ """Collate :term:`Raw Output Data` files together (reduce operation).
237
+
238
+ :term:`Raw Output Data` files gathered from N :term:`Experimental Runs
239
+ <Experimental Run>` are combined together into a single :term:`Batch Summary
240
+ Data` file per :term:`Experiment` with 1 column per run.
241
+ """
242
+ utils.dir_create_checked(batch_stat_collate_root, exist_ok=True)
243
+
244
+ collated = {}
245
+
246
+ key = (spec.gather.item_stem_path, spec.gather.collate_col)
247
+ collated[key] = pd.DataFrame(index=spec.dfs[0].index, columns=spec.exp_run_names)
248
+ for i, df in enumerate(spec.dfs):
249
+ assert (
250
+ spec.gather.collate_col in df.columns
251
+ ), f"{spec.gather.collate_col} not in {df.columns}"
252
+
253
+ collate_df = df[spec.gather.collate_col]
254
+ collated[key][spec.exp_run_names[i]] = collate_df
255
+
256
+ for file_path, col in collated:
257
+ df = utils.df_fill(collated[(file_path, col)], process_opts["df_homogenize"])
258
+ parent = batch_stat_collate_root / spec.gather.exp_name / file_path.parent
259
+ utils.dir_create_checked(parent, exist_ok=True)
260
+
261
+ # This preserves the directory structure of stuff in the per-run output
262
+ # run; if something is in a subdir there, it will show up in a subdir in
263
+ # the collated outputs too.
264
+ fname = f"{file_path.stem}-{col}" + config.kStorageExt["csv"]
265
+ storage.df_write(df, parent / fname, "storage.csv", index=False)
266
+
267
+
268
+ __all__ = [
269
+ "proc_batch_exp",
270
+ "ExpDataGatherer",
271
+ ]
@@ -0,0 +1,18 @@
1
+ # Copyright 2021 John Harwell, All rights reserved.
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+ """
5
+ Container module for the processing plugin to compress data.
6
+
7
+ See :ref:`plugins/proc/compress`.
8
+ """
9
+
10
+ # Core packages
11
+
12
+ # 3rd party packages
13
+
14
+ # Project packages
15
+
16
+
17
+ def sierra_plugin_type() -> str:
18
+ return "pipeline"
@@ -0,0 +1,47 @@
1
+ #
2
+ # Copyright 2025 John Harwell, All rights reserved.
3
+ #
4
+ # SPDX-License Identifier: MIT
5
+ #
6
+
7
+ # Core packages
8
+ import typing as tp
9
+ import argparse
10
+
11
+ # 3rd party packages
12
+
13
+ # Project packages
14
+ from sierra.core import types
15
+ from sierra.plugins import PluginCmdline
16
+
17
+
18
+ def build(
19
+ parents: tp.List[argparse.ArgumentParser], stages: tp.List[int]
20
+ ) -> PluginCmdline:
21
+ """
22
+ Get a cmdline parser supporting the ``proc.compress`` processing plugin.
23
+ """
24
+ cmdline = PluginCmdline(parents, stages)
25
+ cmdline.stage3.add_argument(
26
+ "--compress-remove-after",
27
+ action="store_true",
28
+ help="""
29
+ If the ``proc.compress`` plugin is run, remove the uncompressed
30
+ :term:`Raw Output Data` files after compression. This can save
31
+ TONS of disk space. No data is lost because everything output
32
+ by each :term:`Experimental Run` is in the compressed archive.
33
+ """
34
+ + cmdline.stage_usage_doc([3]),
35
+ default=False,
36
+ )
37
+ return cmdline
38
+
39
+
40
+ def to_cmdopts(args: argparse.Namespace) -> types.Cmdopts:
41
+ return {
42
+ "compress_remove_after": args.compress_remove_after,
43
+ }
44
+
45
+
46
+ def sphinx_cmdline_stage3():
47
+ return build([], [3]).parser
@@ -0,0 +1,123 @@
1
+ # Copyright 2025 John Harwell, All rights reserved.
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+ """
5
+ Plugin for compressing experiment data. Currently only works with .tar.gz files.
6
+ """
7
+
8
+ # Core packages
9
+ import multiprocessing as mp
10
+ import typing as tp
11
+ import logging
12
+ import pathlib
13
+ import shutil
14
+
15
+ # 3rd party packages
16
+ import tarfile
17
+
18
+ # Project packages
19
+ import sierra.core.variables.batch_criteria as bc
20
+ from sierra.core import types, utils, batchroot
21
+
22
+ _logger = logging.getLogger(__name__)
23
+
24
+
25
+ def proc_batch_exp(
26
+ main_config: types.YAMLDict,
27
+ cmdopts: types.Cmdopts,
28
+ pathset: batchroot.PathSet,
29
+ criteria: bc.XVarBatchCriteria,
30
+ ) -> None:
31
+ """
32
+ Comcompress data for each :term:`Experiment` in the :term:`Batch Experiment`.
33
+
34
+ Ideally this is done in parallel across experiments, but this can be changed
35
+ to serial if memory on the SIERRA host machine is limited via
36
+ ``--processing-parallelism``.
37
+ """
38
+
39
+ exp_to_proc = utils.exp_range_calc(
40
+ cmdopts["exp_range"], pathset.output_root, criteria.gen_exp_names()
41
+ )
42
+
43
+ parallelism = cmdopts["processing_parallelism"]
44
+
45
+ tasks = []
46
+ run_metrics_leaf = main_config["sierra"]["run"]["run_metrics_leaf"]
47
+
48
+ for exp in exp_to_proc:
49
+ tasks.extend(
50
+ _build_tasklist_for_exp(
51
+ pathset.output_root / exp.name,
52
+ run_metrics_leaf,
53
+ cmdopts["compress_remove_after"],
54
+ )
55
+ )
56
+
57
+ _logger.debug("Starting %s workers, method=%s", parallelism, mp.get_start_method())
58
+ with mp.Pool(processes=parallelism, maxtasksperchild=1) as pool:
59
+ processed = [pool.starmap_async(_worker, tasks)]
60
+ _logger.debug("Waiting for workers to finish")
61
+
62
+ for p in processed:
63
+ p.get()
64
+
65
+ pool.close()
66
+ pool.join()
67
+
68
+ _logger.debug("All workers finished")
69
+
70
+
71
+ def _build_tasklist_for_exp(
72
+ exp_output_root: pathlib.Path,
73
+ run_metrics_leaf: pathlib.Path,
74
+ remove_after: bool,
75
+ ) -> tp.List[tp.Tuple[pathlib.Path, pathlib.Path, bool]]:
76
+ """Add root dir each experimental run to queue for processing.
77
+
78
+ Enqueueing for processing is done at the file-level rather than
79
+ per-experiment, so that for systems with more CPUs than experiments you
80
+ still get maximum throughput.
81
+ """
82
+ res = []
83
+ for exp in exp_output_root.iterdir():
84
+ res.append(
85
+ (
86
+ exp_output_root,
87
+ exp.relative_to(exp_output_root) / run_metrics_leaf,
88
+ remove_after,
89
+ )
90
+ )
91
+
92
+ return res
93
+
94
+
95
+ def _worker(
96
+ exp_output_root: pathlib.Path, relpath: pathlib.Path, remove_after: bool
97
+ ) -> None:
98
+ """Compress the output root for a single experiment into a tarball.
99
+
100
+ Arguments:
101
+ exp_output_root: Output root for the :term:`Experiment`.
102
+
103
+ relpath: Path to the actual tarball relative to the experiment root.
104
+ """
105
+
106
+ if not (exp_output_root / relpath).exists():
107
+ _logger.warning(
108
+ "Cannot compress: %s does not exist", (exp_output_root / relpath)
109
+ )
110
+ return
111
+
112
+ with tarfile.open(
113
+ (exp_output_root / relpath).with_suffix(".tar.gz"), "w:gz"
114
+ ) as tar:
115
+ tar.add(
116
+ str(exp_output_root / relpath), arcname=relpath.relative_to(relpath.parent)
117
+ )
118
+
119
+ if remove_after:
120
+ shutil.rmtree(exp_output_root / relpath)
121
+
122
+
123
+ __all__ = ["proc_batch_exp"]
@@ -0,0 +1,18 @@
1
+ # Copyright 2021 John Harwell, All rights reserved.
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+ """
5
+ Container module for the processing plugin to decompress data.
6
+
7
+ See :ref:`plugins/proc/decompress`.
8
+ """
9
+
10
+ # Core packages
11
+
12
+ # 3rd party packages
13
+
14
+ # Project packages
15
+
16
+
17
+ def sierra_plugin_type() -> str:
18
+ return "pipeline"
@@ -0,0 +1,96 @@
1
+ # Copyright 2025 John Harwell, All rights reserved.
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+ """
5
+ Plugin for decompressing experiment data. Currently only works with .tar.gz files.
6
+ """
7
+
8
+ # Core packages
9
+ import multiprocessing as mp
10
+ import typing as tp
11
+ import logging
12
+ import pathlib
13
+
14
+ # 3rd party packages
15
+ import tarfile
16
+
17
+ # Project packages
18
+ import sierra.core.variables.batch_criteria as bc
19
+ from sierra.core import types, utils, batchroot
20
+
21
+ _logger = logging.getLogger(__name__)
22
+
23
+
24
+ def proc_batch_exp(
25
+ main_config: types.YAMLDict,
26
+ cmdopts: types.Cmdopts,
27
+ pathset: batchroot.PathSet,
28
+ criteria: bc.XVarBatchCriteria,
29
+ ) -> None:
30
+ """
31
+ Uncomcompress data for each :term:`Experiment` in the :term:`Batch Experiment`.
32
+
33
+ Ideally this is done in parallel across experiments, but this can be changed
34
+ to serial if memory on the SIERRA host machine is limited via
35
+ ``--processing-parallelism``.
36
+ """
37
+
38
+ exp_to_proc = utils.exp_range_calc(
39
+ cmdopts["exp_range"], pathset.output_root, criteria.gen_exp_names()
40
+ )
41
+
42
+ parallelism = cmdopts["processing_parallelism"]
43
+
44
+ tasks = []
45
+ for exp in exp_to_proc:
46
+ tasks.extend(_build_tasklist_for_exp(pathset.output_root / exp.name))
47
+
48
+ _logger.debug("Starting %s workers, method=%s", parallelism, mp.get_start_method())
49
+ with mp.Pool(processes=parallelism, maxtasksperchild=1) as pool:
50
+ processed = [pool.starmap_async(_worker, tasks)]
51
+ _logger.debug("Waiting for workers to finish")
52
+
53
+ for p in processed:
54
+ p.get()
55
+
56
+ pool.close()
57
+ pool.join()
58
+
59
+ _logger.debug("All workers finished")
60
+
61
+
62
+ def _build_tasklist_for_exp(
63
+ exp_output_root: pathlib.Path,
64
+ ) -> tp.List[tp.Tuple[pathlib.Path, pathlib.Path]]:
65
+ """Add all compressed files from experiment to queue for processing.
66
+
67
+ Enqueueing for processing is done at the file-level rather than
68
+ per-experiment, so that for systems with more CPUs than experiments you
69
+ still get maximum pthroughput.
70
+ """
71
+ res = []
72
+ for f in exp_output_root.rglob("*.tar.gz"):
73
+ res.append((exp_output_root, f.relative_to(exp_output_root)))
74
+
75
+ return res
76
+
77
+
78
+ def _worker(exp_output_root: pathlib.Path, relpath: pathlib.Path) -> None:
79
+ """Decompress a single tarball from a single experiment.
80
+
81
+ Arguments:
82
+ exp_output_root: Output root for the :term:`Experiment`.
83
+
84
+ relpath: Path to the actual tarball relative to the experiment root.
85
+ """
86
+ if not (exp_output_root / relpath).exists():
87
+ _logger.warning(
88
+ "Cannot decompress: %s does not exist", (exp_output_root / relpath)
89
+ )
90
+ return
91
+
92
+ with tarfile.open(exp_output_root / relpath, "r:gz") as tar:
93
+ tar.extractall(filter="data", path=str((exp_output_root / relpath).parent))
94
+
95
+
96
+ __all__ = ["proc_batch_exp"]
@@ -0,0 +1,15 @@
1
+ #
2
+ # Copyright 2025 John Harwell, All rights reserved.
3
+ #
4
+ # SPDX-License Identifier: MIT
5
+ #
6
+ """Container module for the :term:`imagizing` data processing plugin."""
7
+ # Core packages
8
+
9
+ # 3rd party packages
10
+
11
+ # Project packages
12
+
13
+
14
+ def sierra_plugin_type() -> str:
15
+ return "pipeline"
@@ -0,0 +1,49 @@
1
+ #
2
+ # Copyright 2025 John Harwell, All rights reserved.
3
+ #
4
+ # SPDX-License Identifier: MIT
5
+ #
6
+
7
+ # Core packages
8
+ import typing as tp
9
+ import argparse
10
+
11
+ # 3rd party packages
12
+
13
+ # Project packages
14
+ from sierra.core import types
15
+ from sierra.plugins import PluginCmdline
16
+
17
+
18
+ def build(
19
+ parents: tp.List[argparse.ArgumentParser], stages: tp.List[int]
20
+ ) -> PluginCmdline:
21
+ """
22
+ Get a cmdline parser supporting the ``proc.imagize`` processing plugin.
23
+ """
24
+ cmdline = PluginCmdline(parents, stages)
25
+ cmdline.stage3.add_argument(
26
+ "--imagize-no-stats",
27
+ action="store_true",
28
+ help="""
29
+ If the ``proc.imagize`` plugin is run, don't run statistics
30
+ generation/assume it has already been run. This can save TONS of
31
+ time for large imagizing workloads/workloads where the memory
32
+ limitations of the SIERRA host machine are such that you need to
33
+ specify different levels of ``--processing-parallelism`` for
34
+ statistics calculations/imagizing to avoid filling up memory.
35
+ """
36
+ + cmdline.stage_usage_doc([3]),
37
+ default=False,
38
+ )
39
+ return cmdline
40
+
41
+
42
+ def to_cmdopts(args: argparse.Namespace) -> types.Cmdopts:
43
+ return {
44
+ "imagize_no_stats": args.imagize_no_stats,
45
+ }
46
+
47
+
48
+ def sphinx_cmdline_stage3():
49
+ return build([], [3]).parser