sierra-research 1.3.11__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (254) hide show
  1. sierra/__init__.py +3 -3
  2. sierra/core/__init__.py +3 -3
  3. sierra/core/batchroot.py +223 -0
  4. sierra/core/cmdline.py +681 -1057
  5. sierra/core/compare.py +11 -0
  6. sierra/core/config.py +96 -88
  7. sierra/core/engine.py +306 -0
  8. sierra/core/execenv.py +380 -0
  9. sierra/core/expdef.py +11 -0
  10. sierra/core/experiment/__init__.py +1 -0
  11. sierra/core/experiment/bindings.py +150 -101
  12. sierra/core/experiment/definition.py +414 -245
  13. sierra/core/experiment/spec.py +83 -85
  14. sierra/core/exproot.py +44 -0
  15. sierra/core/generators/__init__.py +10 -0
  16. sierra/core/generators/experiment.py +528 -0
  17. sierra/core/generators/generator_factory.py +138 -137
  18. sierra/core/graphs/__init__.py +23 -0
  19. sierra/core/graphs/bcbridge.py +94 -0
  20. sierra/core/graphs/heatmap.py +245 -324
  21. sierra/core/graphs/pathset.py +27 -0
  22. sierra/core/graphs/schema.py +77 -0
  23. sierra/core/graphs/stacked_line.py +341 -0
  24. sierra/core/graphs/summary_line.py +506 -0
  25. sierra/core/logging.py +3 -2
  26. sierra/core/models/__init__.py +3 -1
  27. sierra/core/models/info.py +19 -0
  28. sierra/core/models/interface.py +52 -122
  29. sierra/core/pipeline/__init__.py +2 -5
  30. sierra/core/pipeline/pipeline.py +228 -126
  31. sierra/core/pipeline/stage1/__init__.py +10 -0
  32. sierra/core/pipeline/stage1/pipeline_stage1.py +45 -31
  33. sierra/core/pipeline/stage2/__init__.py +10 -0
  34. sierra/core/pipeline/stage2/pipeline_stage2.py +8 -11
  35. sierra/core/pipeline/stage2/runner.py +401 -0
  36. sierra/core/pipeline/stage3/__init__.py +12 -0
  37. sierra/core/pipeline/stage3/gather.py +321 -0
  38. sierra/core/pipeline/stage3/pipeline_stage3.py +37 -84
  39. sierra/core/pipeline/stage4/__init__.py +12 -2
  40. sierra/core/pipeline/stage4/pipeline_stage4.py +36 -354
  41. sierra/core/pipeline/stage5/__init__.py +12 -0
  42. sierra/core/pipeline/stage5/pipeline_stage5.py +33 -208
  43. sierra/core/pipeline/yaml.py +48 -0
  44. sierra/core/plugin.py +529 -62
  45. sierra/core/proc.py +11 -0
  46. sierra/core/prod.py +11 -0
  47. sierra/core/ros1/__init__.py +5 -1
  48. sierra/core/ros1/callbacks.py +22 -21
  49. sierra/core/ros1/cmdline.py +59 -88
  50. sierra/core/ros1/generators.py +159 -175
  51. sierra/core/ros1/variables/__init__.py +3 -0
  52. sierra/core/ros1/variables/exp_setup.py +122 -116
  53. sierra/core/startup.py +106 -76
  54. sierra/core/stat_kernels.py +4 -5
  55. sierra/core/storage.py +13 -32
  56. sierra/core/trampoline.py +30 -0
  57. sierra/core/types.py +116 -71
  58. sierra/core/utils.py +103 -106
  59. sierra/core/variables/__init__.py +1 -1
  60. sierra/core/variables/base_variable.py +12 -17
  61. sierra/core/variables/batch_criteria.py +387 -481
  62. sierra/core/variables/builtin.py +135 -0
  63. sierra/core/variables/exp_setup.py +19 -39
  64. sierra/core/variables/population_size.py +72 -76
  65. sierra/core/variables/variable_density.py +44 -68
  66. sierra/core/vector.py +1 -1
  67. sierra/main.py +256 -88
  68. sierra/plugins/__init__.py +119 -0
  69. sierra/plugins/compare/__init__.py +14 -0
  70. sierra/plugins/compare/graphs/__init__.py +19 -0
  71. sierra/plugins/compare/graphs/cmdline.py +120 -0
  72. sierra/plugins/compare/graphs/comparator.py +291 -0
  73. sierra/plugins/compare/graphs/inter_controller.py +531 -0
  74. sierra/plugins/compare/graphs/inter_scenario.py +297 -0
  75. sierra/plugins/compare/graphs/namecalc.py +53 -0
  76. sierra/plugins/compare/graphs/outputroot.py +73 -0
  77. sierra/plugins/compare/graphs/plugin.py +147 -0
  78. sierra/plugins/compare/graphs/preprocess.py +172 -0
  79. sierra/plugins/compare/graphs/schema.py +37 -0
  80. sierra/plugins/engine/__init__.py +14 -0
  81. sierra/plugins/engine/argos/__init__.py +18 -0
  82. sierra/plugins/{platform → engine}/argos/cmdline.py +144 -151
  83. sierra/plugins/{platform/argos/variables → engine/argos/generators}/__init__.py +5 -0
  84. sierra/plugins/engine/argos/generators/engine.py +394 -0
  85. sierra/plugins/engine/argos/plugin.py +393 -0
  86. sierra/plugins/{platform/argos/generators → engine/argos/variables}/__init__.py +5 -0
  87. sierra/plugins/engine/argos/variables/arena_shape.py +183 -0
  88. sierra/plugins/engine/argos/variables/cameras.py +240 -0
  89. sierra/plugins/engine/argos/variables/constant_density.py +112 -0
  90. sierra/plugins/engine/argos/variables/exp_setup.py +82 -0
  91. sierra/plugins/{platform → engine}/argos/variables/physics_engines.py +83 -87
  92. sierra/plugins/engine/argos/variables/population_constant_density.py +178 -0
  93. sierra/plugins/engine/argos/variables/population_size.py +115 -0
  94. sierra/plugins/engine/argos/variables/population_variable_density.py +123 -0
  95. sierra/plugins/engine/argos/variables/rendering.py +108 -0
  96. sierra/plugins/engine/ros1gazebo/__init__.py +18 -0
  97. sierra/plugins/engine/ros1gazebo/cmdline.py +175 -0
  98. sierra/plugins/{platform/ros1robot → engine/ros1gazebo}/generators/__init__.py +5 -0
  99. sierra/plugins/engine/ros1gazebo/generators/engine.py +125 -0
  100. sierra/plugins/engine/ros1gazebo/plugin.py +404 -0
  101. sierra/plugins/engine/ros1gazebo/variables/__init__.py +15 -0
  102. sierra/plugins/engine/ros1gazebo/variables/population_size.py +214 -0
  103. sierra/plugins/engine/ros1robot/__init__.py +18 -0
  104. sierra/plugins/engine/ros1robot/cmdline.py +159 -0
  105. sierra/plugins/{platform/ros1gazebo → engine/ros1robot}/generators/__init__.py +4 -0
  106. sierra/plugins/engine/ros1robot/generators/engine.py +95 -0
  107. sierra/plugins/engine/ros1robot/plugin.py +410 -0
  108. sierra/plugins/{hpc/local → engine/ros1robot/variables}/__init__.py +5 -0
  109. sierra/plugins/engine/ros1robot/variables/population_size.py +146 -0
  110. sierra/plugins/execenv/__init__.py +11 -0
  111. sierra/plugins/execenv/hpc/__init__.py +18 -0
  112. sierra/plugins/execenv/hpc/adhoc/__init__.py +18 -0
  113. sierra/plugins/execenv/hpc/adhoc/cmdline.py +30 -0
  114. sierra/plugins/execenv/hpc/adhoc/plugin.py +131 -0
  115. sierra/plugins/execenv/hpc/cmdline.py +137 -0
  116. sierra/plugins/execenv/hpc/local/__init__.py +18 -0
  117. sierra/plugins/execenv/hpc/local/cmdline.py +31 -0
  118. sierra/plugins/execenv/hpc/local/plugin.py +145 -0
  119. sierra/plugins/execenv/hpc/pbs/__init__.py +18 -0
  120. sierra/plugins/execenv/hpc/pbs/cmdline.py +30 -0
  121. sierra/plugins/execenv/hpc/pbs/plugin.py +121 -0
  122. sierra/plugins/execenv/hpc/slurm/__init__.py +18 -0
  123. sierra/plugins/execenv/hpc/slurm/cmdline.py +30 -0
  124. sierra/plugins/execenv/hpc/slurm/plugin.py +133 -0
  125. sierra/plugins/execenv/prefectserver/__init__.py +18 -0
  126. sierra/plugins/execenv/prefectserver/cmdline.py +66 -0
  127. sierra/plugins/execenv/prefectserver/dockerremote/__init__.py +18 -0
  128. sierra/plugins/execenv/prefectserver/dockerremote/cmdline.py +66 -0
  129. sierra/plugins/execenv/prefectserver/dockerremote/plugin.py +132 -0
  130. sierra/plugins/execenv/prefectserver/flow.py +66 -0
  131. sierra/plugins/execenv/prefectserver/local/__init__.py +18 -0
  132. sierra/plugins/execenv/prefectserver/local/cmdline.py +29 -0
  133. sierra/plugins/execenv/prefectserver/local/plugin.py +133 -0
  134. sierra/plugins/{hpc/adhoc → execenv/robot}/__init__.py +1 -0
  135. sierra/plugins/execenv/robot/turtlebot3/__init__.py +18 -0
  136. sierra/plugins/execenv/robot/turtlebot3/plugin.py +204 -0
  137. sierra/plugins/expdef/__init__.py +14 -0
  138. sierra/plugins/expdef/json/__init__.py +14 -0
  139. sierra/plugins/expdef/json/plugin.py +504 -0
  140. sierra/plugins/expdef/xml/__init__.py +14 -0
  141. sierra/plugins/expdef/xml/plugin.py +386 -0
  142. sierra/{core/hpc → plugins/proc}/__init__.py +1 -1
  143. sierra/plugins/proc/collate/__init__.py +15 -0
  144. sierra/plugins/proc/collate/cmdline.py +47 -0
  145. sierra/plugins/proc/collate/plugin.py +271 -0
  146. sierra/plugins/proc/compress/__init__.py +18 -0
  147. sierra/plugins/proc/compress/cmdline.py +47 -0
  148. sierra/plugins/proc/compress/plugin.py +123 -0
  149. sierra/plugins/proc/decompress/__init__.py +18 -0
  150. sierra/plugins/proc/decompress/plugin.py +96 -0
  151. sierra/plugins/proc/imagize/__init__.py +15 -0
  152. sierra/plugins/proc/imagize/cmdline.py +49 -0
  153. sierra/plugins/proc/imagize/plugin.py +270 -0
  154. sierra/plugins/proc/modelrunner/__init__.py +16 -0
  155. sierra/plugins/proc/modelrunner/plugin.py +250 -0
  156. sierra/plugins/proc/statistics/__init__.py +15 -0
  157. sierra/plugins/proc/statistics/cmdline.py +64 -0
  158. sierra/plugins/proc/statistics/plugin.py +390 -0
  159. sierra/plugins/{hpc → prod}/__init__.py +1 -0
  160. sierra/plugins/prod/graphs/__init__.py +18 -0
  161. sierra/plugins/prod/graphs/cmdline.py +269 -0
  162. sierra/plugins/prod/graphs/collate.py +279 -0
  163. sierra/plugins/prod/graphs/inter/__init__.py +13 -0
  164. sierra/plugins/prod/graphs/inter/generate.py +83 -0
  165. sierra/plugins/prod/graphs/inter/heatmap.py +86 -0
  166. sierra/plugins/prod/graphs/inter/line.py +134 -0
  167. sierra/plugins/prod/graphs/intra/__init__.py +15 -0
  168. sierra/plugins/prod/graphs/intra/generate.py +202 -0
  169. sierra/plugins/prod/graphs/intra/heatmap.py +74 -0
  170. sierra/plugins/prod/graphs/intra/line.py +114 -0
  171. sierra/plugins/prod/graphs/plugin.py +103 -0
  172. sierra/plugins/prod/graphs/targets.py +63 -0
  173. sierra/plugins/prod/render/__init__.py +18 -0
  174. sierra/plugins/prod/render/cmdline.py +72 -0
  175. sierra/plugins/prod/render/plugin.py +282 -0
  176. sierra/plugins/storage/__init__.py +5 -0
  177. sierra/plugins/storage/arrow/__init__.py +18 -0
  178. sierra/plugins/storage/arrow/plugin.py +38 -0
  179. sierra/plugins/storage/csv/__init__.py +9 -0
  180. sierra/plugins/storage/csv/plugin.py +12 -5
  181. sierra/version.py +3 -2
  182. sierra_research-1.5.0.dist-info/METADATA +238 -0
  183. sierra_research-1.5.0.dist-info/RECORD +186 -0
  184. {sierra_research-1.3.11.dist-info → sierra_research-1.5.0.dist-info}/WHEEL +1 -2
  185. sierra/core/experiment/xml.py +0 -454
  186. sierra/core/generators/controller_generator_parser.py +0 -34
  187. sierra/core/generators/exp_creator.py +0 -351
  188. sierra/core/generators/exp_generators.py +0 -142
  189. sierra/core/graphs/scatterplot2D.py +0 -109
  190. sierra/core/graphs/stacked_line_graph.py +0 -251
  191. sierra/core/graphs/stacked_surface_graph.py +0 -220
  192. sierra/core/graphs/summary_line_graph.py +0 -371
  193. sierra/core/hpc/cmdline.py +0 -142
  194. sierra/core/models/graphs.py +0 -87
  195. sierra/core/pipeline/stage2/exp_runner.py +0 -286
  196. sierra/core/pipeline/stage3/imagizer.py +0 -149
  197. sierra/core/pipeline/stage3/run_collator.py +0 -317
  198. sierra/core/pipeline/stage3/statistics_calculator.py +0 -478
  199. sierra/core/pipeline/stage4/graph_collator.py +0 -320
  200. sierra/core/pipeline/stage4/inter_exp_graph_generator.py +0 -240
  201. sierra/core/pipeline/stage4/intra_exp_graph_generator.py +0 -317
  202. sierra/core/pipeline/stage4/model_runner.py +0 -168
  203. sierra/core/pipeline/stage4/rendering.py +0 -283
  204. sierra/core/pipeline/stage4/yaml_config_loader.py +0 -103
  205. sierra/core/pipeline/stage5/inter_scenario_comparator.py +0 -328
  206. sierra/core/pipeline/stage5/intra_scenario_comparator.py +0 -989
  207. sierra/core/platform.py +0 -493
  208. sierra/core/plugin_manager.py +0 -369
  209. sierra/core/root_dirpath_generator.py +0 -241
  210. sierra/plugins/hpc/adhoc/plugin.py +0 -125
  211. sierra/plugins/hpc/local/plugin.py +0 -81
  212. sierra/plugins/hpc/pbs/__init__.py +0 -9
  213. sierra/plugins/hpc/pbs/plugin.py +0 -126
  214. sierra/plugins/hpc/slurm/__init__.py +0 -9
  215. sierra/plugins/hpc/slurm/plugin.py +0 -130
  216. sierra/plugins/platform/__init__.py +0 -9
  217. sierra/plugins/platform/argos/__init__.py +0 -9
  218. sierra/plugins/platform/argos/generators/platform_generators.py +0 -383
  219. sierra/plugins/platform/argos/plugin.py +0 -337
  220. sierra/plugins/platform/argos/variables/arena_shape.py +0 -145
  221. sierra/plugins/platform/argos/variables/cameras.py +0 -243
  222. sierra/plugins/platform/argos/variables/constant_density.py +0 -136
  223. sierra/plugins/platform/argos/variables/exp_setup.py +0 -113
  224. sierra/plugins/platform/argos/variables/population_constant_density.py +0 -175
  225. sierra/plugins/platform/argos/variables/population_size.py +0 -102
  226. sierra/plugins/platform/argos/variables/population_variable_density.py +0 -132
  227. sierra/plugins/platform/argos/variables/rendering.py +0 -104
  228. sierra/plugins/platform/ros1gazebo/__init__.py +0 -9
  229. sierra/plugins/platform/ros1gazebo/cmdline.py +0 -213
  230. sierra/plugins/platform/ros1gazebo/generators/platform_generators.py +0 -137
  231. sierra/plugins/platform/ros1gazebo/plugin.py +0 -335
  232. sierra/plugins/platform/ros1gazebo/variables/__init__.py +0 -10
  233. sierra/plugins/platform/ros1gazebo/variables/population_size.py +0 -204
  234. sierra/plugins/platform/ros1robot/__init__.py +0 -9
  235. sierra/plugins/platform/ros1robot/cmdline.py +0 -175
  236. sierra/plugins/platform/ros1robot/generators/platform_generators.py +0 -112
  237. sierra/plugins/platform/ros1robot/plugin.py +0 -373
  238. sierra/plugins/platform/ros1robot/variables/__init__.py +0 -10
  239. sierra/plugins/platform/ros1robot/variables/population_size.py +0 -146
  240. sierra/plugins/robot/__init__.py +0 -9
  241. sierra/plugins/robot/turtlebot3/__init__.py +0 -9
  242. sierra/plugins/robot/turtlebot3/plugin.py +0 -194
  243. sierra_research-1.3.11.data/data/share/man/man1/sierra-cli.1 +0 -2349
  244. sierra_research-1.3.11.data/data/share/man/man7/sierra-examples.7 +0 -508
  245. sierra_research-1.3.11.data/data/share/man/man7/sierra-exec-envs.7 +0 -331
  246. sierra_research-1.3.11.data/data/share/man/man7/sierra-glossary.7 +0 -285
  247. sierra_research-1.3.11.data/data/share/man/man7/sierra-platforms.7 +0 -358
  248. sierra_research-1.3.11.data/data/share/man/man7/sierra-usage.7 +0 -729
  249. sierra_research-1.3.11.data/data/share/man/man7/sierra.7 +0 -78
  250. sierra_research-1.3.11.dist-info/METADATA +0 -492
  251. sierra_research-1.3.11.dist-info/RECORD +0 -133
  252. sierra_research-1.3.11.dist-info/top_level.txt +0 -1
  253. {sierra_research-1.3.11.dist-info → sierra_research-1.5.0.dist-info}/entry_points.txt +0 -0
  254. {sierra_research-1.3.11.dist-info → sierra_research-1.5.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,321 @@
1
+ # Copyright 2019 John Harwell, All rights reserved.
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ """
6
+ Classes for gathering :term:`Raw Output Data` files in a batch.
7
+ """
8
+
9
+ # Core packages
10
+ import re
11
+ import multiprocessing as mp
12
+ import typing as tp
13
+ import time
14
+ import datetime
15
+ import logging
16
+ import pathlib
17
+
18
+ # 3rd party packages
19
+ import psutil
20
+ import pandas as pd # noqa
21
+
22
+ # Project packages
23
+ from sierra.core import types, utils, storage
24
+
25
+
26
+ class GatherSpec:
27
+ """
28
+ Data class for specifying files to gather from an :term:`Experiment`.
29
+
30
+ Attributes:
31
+ item_stem_path: The name of the file to gather from all runs in an
32
+ experiment, relative to the output root for the run (to
33
+ support nested outputs).
34
+
35
+ exp_name: The name of the parent experiment.
36
+
37
+
38
+ collate-col: The name of the column associated with the file, as
39
+ configured. Will be None for statistics generation, and
40
+ non-None for collation.
41
+ """
42
+
43
+ def __init__(
44
+ self,
45
+ exp_name: str,
46
+ item_stem_path: pathlib.Path,
47
+ collate_col: tp.Union[str, None],
48
+ ):
49
+ self.exp_name = exp_name
50
+ self.item_stem_path = item_stem_path
51
+ self.collate_col = collate_col
52
+
53
+ def __repr__(self) -> str:
54
+ return f"{self.exp_name}: {self.item_stem_path}"
55
+
56
+
57
+ class ProcessSpec:
58
+ """
59
+ Data class for specifying how to Process :term:`Raw Output Files`.
60
+
61
+ Attributes:
62
+ gather_spec: The specification for how the files were gathered.
63
+
64
+ exp_run_names: The names of the parent experimental runs.
65
+
66
+ dfs: The gathered dataframes. Indices match those in ``exp_run_names``.
67
+
68
+ """
69
+
70
+ def __init__(self, gather: GatherSpec) -> None:
71
+ self.gather = gather
72
+ self.exp_run_names = [] # type: tp.List[str]
73
+ self.dfs = [] # type: tp.List[pd.DataFrame]
74
+
75
+
76
+ class BaseGatherer:
77
+ """Gather a set of output files from all runs in an experiment.
78
+
79
+ "Gathering" in this context means creating a dictionary mapping which files
80
+ came from where, so that later processing can be both across and within
81
+ experiments in the batch.
82
+ """
83
+
84
+ def __init__(
85
+ self,
86
+ main_config: types.YAMLDict,
87
+ gather_opts: types.SimpleDict,
88
+ processq: mp.Queue,
89
+ ) -> None:
90
+ self.processq = processq
91
+ self.gather_opts = gather_opts
92
+
93
+ # Will get the main name and extension of the config file (without the
94
+ # full absolute path).
95
+ self.template_input_fname = self.gather_opts["template_input_leaf"]
96
+ self.main_config = main_config
97
+ self.run_metrics_leaf = main_config["sierra"]["run"]["run_metrics_leaf"]
98
+
99
+ self.logger = logging.getLogger(__name__)
100
+
101
+ def calc_gather_items(
102
+ self, run_output_root: pathlib.Path, exp_name: str
103
+ ) -> tp.List[GatherSpec]:
104
+ raise NotImplementedError
105
+
106
+ def __call__(self, exp_output_root: pathlib.Path) -> None:
107
+ """Process the output files found in the output save path."""
108
+ if self.gather_opts["df_verify"]:
109
+ self._verify_exp_outputs(exp_output_root)
110
+
111
+ self.logger.info(
112
+ "Gathering raw outputs from %s...",
113
+ exp_output_root.relative_to(exp_output_root.parent.parent),
114
+ )
115
+
116
+ pattern = "{}_run{}_output".format(
117
+ re.escape(str(self.gather_opts["template_input_leaf"])), r"\d+"
118
+ )
119
+
120
+ runs = list(exp_output_root.iterdir())
121
+ assert all(re.match(pattern, r.name) for r in runs), (
122
+ f"Extra files/not all dirs in '{exp_output_root}' are exp "
123
+ "run output dirs"
124
+ )
125
+
126
+ to_gather = []
127
+ for run in runs:
128
+ from_run = self.calc_gather_items(run, exp_output_root.name)
129
+ self.logger.trace(
130
+ "Calculated %s items from %s for gathering", len(from_run), run.name
131
+ )
132
+ to_gather.extend(from_run)
133
+ self.logger.trace("Gathering all items...")
134
+
135
+ for spec in to_gather:
136
+ self._wait_for_memory()
137
+ to_process = self._gather_item_from_runs(exp_output_root, spec, runs)
138
+ n_gathered_from = len(to_process.dfs)
139
+ if n_gathered_from != len(runs):
140
+ self.logger.warning(
141
+ (
142
+ "Data not gathered for %s from all experimental runs "
143
+ "in %s: %s runs != %s (--n-runs)"
144
+ ),
145
+ spec.item_stem_path,
146
+ exp_output_root.relative_to(exp_output_root.parent.parent),
147
+ n_gathered_from,
148
+ len(runs),
149
+ )
150
+
151
+ # Put gathered files in the process queue
152
+ self.processq.put(to_process)
153
+
154
+ self.logger.debug(
155
+ "Enqueued %s items from %s for processing",
156
+ len(to_gather),
157
+ exp_output_root.name,
158
+ )
159
+
160
+ def _gather_item_from_runs(
161
+ self,
162
+ exp_output_root: pathlib.Path,
163
+ spec: GatherSpec,
164
+ runs: tp.List[pathlib.Path],
165
+ ) -> ProcessSpec:
166
+ to_process = ProcessSpec(gather=spec)
167
+
168
+ for _, run in enumerate(runs):
169
+ path = run / self.run_metrics_leaf / spec.item_stem_path
170
+ if path.exists() and path.stat().st_size > 0:
171
+ df = storage.df_read(
172
+ path,
173
+ self.gather_opts["storage"],
174
+ run_output_root=run,
175
+ index_col=False,
176
+ )
177
+ if nonumeric := df.select_dtypes(exclude="number").columns.tolist():
178
+ self.logger.warning(
179
+ "Non-numeric columns are not supported: dropping %s from %s",
180
+ nonumeric,
181
+ path.relative_to(exp_output_root),
182
+ )
183
+ df = df.drop(columns=nonumeric)
184
+
185
+ # Indices here must match so that the appropriate data from each run
186
+ # are matched with the name of the run in collated performance data.
187
+ to_process.exp_run_names.append(run.name)
188
+ to_process.dfs.append(df)
189
+
190
+ return to_process
191
+
192
+ def _wait_for_memory(self) -> None:
193
+ while True:
194
+ mem = psutil.virtual_memory()
195
+ avail = mem.available / mem.total
196
+ free_percent = avail * 100
197
+ free_limit = 100 - self.gather_opts["processing_mem_limit"]
198
+
199
+ if free_percent >= free_limit:
200
+ return
201
+
202
+ self.logger.info(
203
+ "Waiting for memory: avail=%s,min=%s", free_percent, free_limit
204
+ )
205
+ time.sleep(1)
206
+
207
+ def _verify_exp_outputs(self, exp_output_root: pathlib.Path) -> None:
208
+ """
209
+ Verify the integrity of all runs in an experiment.
210
+
211
+ Specifically:
212
+
213
+ - All runs produced all CSV files.
214
+
215
+ - All runs CSV files with the same name have the same # rows and
216
+ columns.
217
+
218
+ - No CSV files contain NaNs.
219
+ """
220
+ experiments = exp_output_root.iterdir()
221
+
222
+ self.logger.info("Verifying results in %s...", exp_output_root.name)
223
+
224
+ start = time.time()
225
+
226
+ for exp1 in experiments:
227
+ csv_root1 = exp1 / str(self.run_metrics_leaf)
228
+
229
+ for exp2 in experiments:
230
+ csv_root2 = exp2 / self.run_metrics_leaf
231
+
232
+ if not csv_root2.is_dir():
233
+ continue
234
+
235
+ self._verify_exp_outputs_pairwise(exp_output_root, csv_root1, csv_root2)
236
+
237
+ elapsed = int(time.time() - start)
238
+ sec = datetime.timedelta(seconds=elapsed)
239
+ self.logger.info(
240
+ "Done verifying results in <batch_output_root>/%s: %s",
241
+ exp_output_root.name,
242
+ sec,
243
+ )
244
+
245
+ def _verify_exp_outputs_pairwise(
246
+ self,
247
+ exp_output_root: pathlib.Path,
248
+ ofile_root1: pathlib.Path,
249
+ ofile_root2: pathlib.Path,
250
+ ) -> None:
251
+ for ofile in ofile_root1.rglob("*"):
252
+ path1 = ofile
253
+ path2 = ofile_root2 / ofile.name
254
+
255
+ # If either path is a directory, that directory MIGHT container
256
+ # imagizing data. We use the following heuristic:
257
+ #
258
+ # If the directory only contains files AND all the files have the
259
+ # same extension AND all the files contain the directory name, we
260
+ # conclude that the directory contains imagizing data and skip it.
261
+ #
262
+ # Otherwise, check it, as projects/engines can output their data in
263
+ # a directory tree, and we want to verify that.
264
+ if (
265
+ path1.is_dir()
266
+ and path2.is_dir()
267
+ and all(f.is_file() and path1.name in f.name for f in path1.iterdir())
268
+ and all(f.is_file() and path2.name in f.name for f in path2.iterdir())
269
+ ):
270
+ self.logger.debug(
271
+ (
272
+ "Not verifying {<exp_output_root>/%s,<exp_output_root>/%s} pairwise: "
273
+ "contains data for imagizing"
274
+ ),
275
+ path1.relative_to(exp_output_root),
276
+ path2.relative_to(exp_output_root),
277
+ )
278
+ continue
279
+
280
+ if path1.is_dir() or path2.is_dir():
281
+ continue
282
+
283
+ if path1.parent.name in path1.name or path2.parent.name in path2.name:
284
+ self.logger.trace(
285
+ (
286
+ "Not verifying {<exp_output_root>/%s,<exp_output_root>/%s} pairwise: "
287
+ "imagizing data"
288
+ ),
289
+ path1.relative_to(exp_output_root),
290
+ path2.relative_to(exp_output_root),
291
+ )
292
+ continue
293
+
294
+ assert utils.path_exists(path1) and utils.path_exists(
295
+ path2
296
+ ), f"Either {path1} or {path2} does not exist"
297
+
298
+ # Verify both dataframes have same # columns, and that
299
+ # column sets are identical
300
+ df1 = storage.df_read(path1, self.gather_opts["storage"])
301
+ df2 = storage.df_read(path2, self.gather_opts["storage"])
302
+
303
+ assert len(df1.columns) == len(
304
+ df2.columns
305
+ ), f"Dataframes from {path1} and {path2} do not have the same # columns"
306
+ assert sorted(df1.columns) == sorted(
307
+ df2.columns
308
+ ), f"Columns from {path1} and {path2} not identical"
309
+
310
+ # Verify the length of all columns in both dataframes is the same
311
+ for c1 in df1.columns:
312
+ assert all(
313
+ len(df1[c1]) == len(df1[c2]) for c2 in df1.columns
314
+ ), f"Not all columns from {path1} have same length"
315
+
316
+ assert all(
317
+ len(df1[c1]) == len(df2[c2]) for c2 in df1.columns
318
+ ), f"Not all columns from {path1} and {path2} have the same length"
319
+
320
+
321
+ __all__ = ["GatherSpec", "BaseGatherer"]
@@ -2,25 +2,19 @@
2
2
  #
3
3
  # SPDX-License-Identifier: MIT
4
4
 
5
- """Stage 3 of the experimental pipeline: processing experimental results.
6
-
7
- """
5
+ """Stage 3 of the experimental pipeline: processing experimental results."""
8
6
 
9
7
  # Core packages
10
8
  import time
11
9
  import datetime
12
10
  import logging
13
- import pathlib
14
11
 
15
12
  # 3rd party packages
16
- import yaml
17
13
 
18
14
  # Project packages
19
- from sierra.core.pipeline.stage3.statistics_calculator import BatchExpParallelCalculator
20
- from sierra.core.pipeline.stage3.run_collator import ExperimentalRunParallelCollator
21
- from sierra.core.pipeline.stage3.imagizer import BatchExpParallelImagizer
22
15
  import sierra.core.variables.batch_criteria as bc
23
- from sierra.core import types, utils
16
+ from sierra.core import types, batchroot
17
+ import sierra.core.plugin as pm
24
18
 
25
19
 
26
20
  class PipelineStage3:
@@ -28,91 +22,50 @@ class PipelineStage3:
28
22
 
29
23
  Currently this includes:
30
24
 
31
- - Generating statistics from results for generating per-experiment graphs
32
- during stage 4. This can generate :term:`Averaged .csv` files, among
33
- other statistics.
25
+ - Generating statistics from results for generating per-experiment
26
+ graphs during stage 4. This can generate :term:`Processed Output
27
+ Data` files, among other statistics.
34
28
 
35
- - Collating results across experiments for generating inter-experiment
36
- graphs during stage 4.
29
+ - Collating results across experiments for generating inter-experiment
30
+ graphs during stage 4.
37
31
 
38
- - Generating image files from project metric collection for later use in
39
- video rendering in stage 4.
32
+ - Generating image files from project metric collection for later use in
33
+ video rendering in stage 4.
40
34
 
41
35
  This stage is idempotent.
42
-
43
36
  """
44
37
 
45
- def __init__(self, main_config: dict, cmdopts: types.Cmdopts) -> None:
38
+ def __init__(
39
+ self,
40
+ main_config: types.YAMLDict,
41
+ cmdopts: types.Cmdopts,
42
+ pathset: batchroot.PathSet,
43
+ ) -> None:
46
44
  self.logger = logging.getLogger(__name__)
47
45
  self.main_config = main_config
48
46
  self.cmdopts = cmdopts
47
+ self.pathset = pathset
48
+
49
+ def run(self, criteria: bc.XVarBatchCriteria) -> None:
50
+ spec = self.cmdopts["proc"]
51
+ self.logger.info(
52
+ "Processing data with %s processing plugins: %s", len(spec), spec
53
+ )
54
+ for s in spec:
55
+ module = pm.pipeline.get_plugin_module(s)
56
+ self.logger.info(
57
+ "Running %s in <batchroot>/%s",
58
+ s,
59
+ self.pathset.output_root.relative_to(self.pathset.root),
60
+ )
49
61
 
50
- def run(self, criteria: bc.IConcreteBatchCriteria) -> None:
51
- self._run_statistics(self.main_config, self.cmdopts, criteria)
52
- self._run_run_collation(self.main_config, self.cmdopts, criteria)
53
-
54
- if self.cmdopts['project_imagizing']:
55
- intra_HM_path = pathlib.Path(self.cmdopts['project_config_root']) \
56
- / pathlib.Path('intra-graphs-hm.yaml')
57
-
58
- if utils.path_exists(intra_HM_path):
59
- self.logger.info(("Loading intra-experiment heatmap config for "
60
- "project '%s'"),
61
- self.cmdopts['project'])
62
- intra_HM_config = yaml.load(utils.utf8open(intra_HM_path),
63
- yaml.FullLoader)
64
- self._run_imagizing(self.main_config,
65
- intra_HM_config,
66
- self.cmdopts,
67
- criteria)
68
-
69
- else:
70
- self.logger.warning("%s does not exist--cannot imagize",
71
- intra_HM_path)
72
-
73
- # Private functions
74
-
75
- def _run_statistics(self,
76
- main_config: dict,
77
- cmdopts: types.Cmdopts, criteria:
78
- bc.IConcreteBatchCriteria):
79
- self.logger.info("Generating statistics from experiment outputs in %s...",
80
- cmdopts['batch_output_root'])
81
- start = time.time()
82
- BatchExpParallelCalculator(main_config, cmdopts)(criteria)
83
- elapsed = int(time.time() - start)
84
- sec = datetime.timedelta(seconds=elapsed)
85
- self.logger.info("Statistics generation complete in %s", str(sec))
86
-
87
- def _run_run_collation(self,
88
- main_config: dict,
89
- cmdopts: types.Cmdopts, criteria:
90
- bc.IConcreteBatchCriteria):
91
- if not self.cmdopts['skip_collate']:
92
- self.logger.info("Collating experiment run outputs into %s...",
93
- cmdopts['batch_stat_collate_root'])
94
62
  start = time.time()
95
- ExperimentalRunParallelCollator(main_config, cmdopts)(criteria)
63
+ module.proc_batch_exp(
64
+ self.main_config, self.cmdopts, self.pathset, criteria
65
+ )
96
66
  elapsed = int(time.time() - start)
97
67
  sec = datetime.timedelta(seconds=elapsed)
98
- self.logger.info(
99
- "Experimental run output collation complete in %s", str(sec))
100
-
101
- def _run_imagizing(self,
102
- main_config: dict,
103
- intra_HM_config: dict,
104
- cmdopts: types.Cmdopts,
105
- criteria: bc.IConcreteBatchCriteria):
106
- self.logger.info("Imagizing .csvs in %s...",
107
- cmdopts['batch_output_root'])
108
- start = time.time()
109
- BatchExpParallelImagizer(main_config, cmdopts)(
110
- intra_HM_config, criteria)
111
- elapsed = int(time.time() - start)
112
- sec = datetime.timedelta(seconds=elapsed)
113
- self.logger.info("Imagizing complete: %s", str(sec))
114
-
115
-
116
- __api__ = [
117
- 'PipelineStage3'
118
- ]
68
+ self.logger.info("Processing with %s complete in %s", s, str(sec))
69
+
70
+
71
+ __all__ = ["PipelineStage3"]
@@ -1,2 +1,12 @@
1
- from . import inter_exp_graph_generator
2
- from . import intra_exp_graph_generator
1
+ #
2
+ # Copyright 2025 John Harwell, All rights reserved.
3
+ #
4
+ # SPDX-License Identifier: MIT
5
+ #
6
+ """Container module for stage 4 of the pipeline."""
7
+
8
+ # Core packages
9
+
10
+ # 3rd party packages
11
+
12
+ # Project packages