sierra-research 1.3.6__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (254) hide show
  1. sierra/__init__.py +3 -3
  2. sierra/core/__init__.py +3 -3
  3. sierra/core/batchroot.py +223 -0
  4. sierra/core/cmdline.py +681 -1057
  5. sierra/core/compare.py +11 -0
  6. sierra/core/config.py +96 -88
  7. sierra/core/engine.py +306 -0
  8. sierra/core/execenv.py +380 -0
  9. sierra/core/expdef.py +11 -0
  10. sierra/core/experiment/__init__.py +1 -0
  11. sierra/core/experiment/bindings.py +150 -101
  12. sierra/core/experiment/definition.py +414 -245
  13. sierra/core/experiment/spec.py +83 -85
  14. sierra/core/exproot.py +44 -0
  15. sierra/core/generators/__init__.py +10 -0
  16. sierra/core/generators/experiment.py +528 -0
  17. sierra/core/generators/generator_factory.py +138 -137
  18. sierra/core/graphs/__init__.py +23 -0
  19. sierra/core/graphs/bcbridge.py +94 -0
  20. sierra/core/graphs/heatmap.py +245 -324
  21. sierra/core/graphs/pathset.py +27 -0
  22. sierra/core/graphs/schema.py +77 -0
  23. sierra/core/graphs/stacked_line.py +341 -0
  24. sierra/core/graphs/summary_line.py +506 -0
  25. sierra/core/logging.py +3 -2
  26. sierra/core/models/__init__.py +3 -1
  27. sierra/core/models/info.py +19 -0
  28. sierra/core/models/interface.py +52 -122
  29. sierra/core/pipeline/__init__.py +2 -5
  30. sierra/core/pipeline/pipeline.py +228 -126
  31. sierra/core/pipeline/stage1/__init__.py +10 -0
  32. sierra/core/pipeline/stage1/pipeline_stage1.py +45 -31
  33. sierra/core/pipeline/stage2/__init__.py +10 -0
  34. sierra/core/pipeline/stage2/pipeline_stage2.py +8 -11
  35. sierra/core/pipeline/stage2/runner.py +401 -0
  36. sierra/core/pipeline/stage3/__init__.py +12 -0
  37. sierra/core/pipeline/stage3/gather.py +321 -0
  38. sierra/core/pipeline/stage3/pipeline_stage3.py +37 -84
  39. sierra/core/pipeline/stage4/__init__.py +12 -2
  40. sierra/core/pipeline/stage4/pipeline_stage4.py +36 -354
  41. sierra/core/pipeline/stage5/__init__.py +12 -0
  42. sierra/core/pipeline/stage5/pipeline_stage5.py +33 -208
  43. sierra/core/pipeline/yaml.py +48 -0
  44. sierra/core/plugin.py +529 -62
  45. sierra/core/proc.py +11 -0
  46. sierra/core/prod.py +11 -0
  47. sierra/core/ros1/__init__.py +5 -1
  48. sierra/core/ros1/callbacks.py +22 -21
  49. sierra/core/ros1/cmdline.py +59 -88
  50. sierra/core/ros1/generators.py +159 -175
  51. sierra/core/ros1/variables/__init__.py +3 -0
  52. sierra/core/ros1/variables/exp_setup.py +122 -116
  53. sierra/core/startup.py +106 -76
  54. sierra/core/stat_kernels.py +4 -5
  55. sierra/core/storage.py +13 -32
  56. sierra/core/trampoline.py +30 -0
  57. sierra/core/types.py +116 -71
  58. sierra/core/utils.py +103 -106
  59. sierra/core/variables/__init__.py +1 -1
  60. sierra/core/variables/base_variable.py +12 -17
  61. sierra/core/variables/batch_criteria.py +387 -481
  62. sierra/core/variables/builtin.py +135 -0
  63. sierra/core/variables/exp_setup.py +19 -39
  64. sierra/core/variables/population_size.py +72 -76
  65. sierra/core/variables/variable_density.py +44 -68
  66. sierra/core/vector.py +1 -1
  67. sierra/main.py +256 -88
  68. sierra/plugins/__init__.py +119 -0
  69. sierra/plugins/compare/__init__.py +14 -0
  70. sierra/plugins/compare/graphs/__init__.py +19 -0
  71. sierra/plugins/compare/graphs/cmdline.py +120 -0
  72. sierra/plugins/compare/graphs/comparator.py +291 -0
  73. sierra/plugins/compare/graphs/inter_controller.py +531 -0
  74. sierra/plugins/compare/graphs/inter_scenario.py +297 -0
  75. sierra/plugins/compare/graphs/namecalc.py +53 -0
  76. sierra/plugins/compare/graphs/outputroot.py +73 -0
  77. sierra/plugins/compare/graphs/plugin.py +147 -0
  78. sierra/plugins/compare/graphs/preprocess.py +172 -0
  79. sierra/plugins/compare/graphs/schema.py +37 -0
  80. sierra/plugins/engine/__init__.py +14 -0
  81. sierra/plugins/engine/argos/__init__.py +18 -0
  82. sierra/plugins/{platform → engine}/argos/cmdline.py +144 -151
  83. sierra/plugins/{platform/argos/variables → engine/argos/generators}/__init__.py +5 -0
  84. sierra/plugins/engine/argos/generators/engine.py +394 -0
  85. sierra/plugins/engine/argos/plugin.py +393 -0
  86. sierra/plugins/{platform/argos/generators → engine/argos/variables}/__init__.py +5 -0
  87. sierra/plugins/engine/argos/variables/arena_shape.py +183 -0
  88. sierra/plugins/engine/argos/variables/cameras.py +240 -0
  89. sierra/plugins/engine/argos/variables/constant_density.py +112 -0
  90. sierra/plugins/engine/argos/variables/exp_setup.py +82 -0
  91. sierra/plugins/{platform → engine}/argos/variables/physics_engines.py +83 -87
  92. sierra/plugins/engine/argos/variables/population_constant_density.py +178 -0
  93. sierra/plugins/engine/argos/variables/population_size.py +115 -0
  94. sierra/plugins/engine/argos/variables/population_variable_density.py +123 -0
  95. sierra/plugins/engine/argos/variables/rendering.py +108 -0
  96. sierra/plugins/engine/ros1gazebo/__init__.py +18 -0
  97. sierra/plugins/engine/ros1gazebo/cmdline.py +175 -0
  98. sierra/plugins/{platform/ros1robot → engine/ros1gazebo}/generators/__init__.py +5 -0
  99. sierra/plugins/engine/ros1gazebo/generators/engine.py +125 -0
  100. sierra/plugins/engine/ros1gazebo/plugin.py +404 -0
  101. sierra/plugins/engine/ros1gazebo/variables/__init__.py +15 -0
  102. sierra/plugins/engine/ros1gazebo/variables/population_size.py +214 -0
  103. sierra/plugins/engine/ros1robot/__init__.py +18 -0
  104. sierra/plugins/engine/ros1robot/cmdline.py +159 -0
  105. sierra/plugins/{platform/ros1gazebo → engine/ros1robot}/generators/__init__.py +4 -0
  106. sierra/plugins/engine/ros1robot/generators/engine.py +95 -0
  107. sierra/plugins/engine/ros1robot/plugin.py +410 -0
  108. sierra/plugins/{hpc/local → engine/ros1robot/variables}/__init__.py +5 -0
  109. sierra/plugins/engine/ros1robot/variables/population_size.py +146 -0
  110. sierra/plugins/execenv/__init__.py +11 -0
  111. sierra/plugins/execenv/hpc/__init__.py +18 -0
  112. sierra/plugins/execenv/hpc/adhoc/__init__.py +18 -0
  113. sierra/plugins/execenv/hpc/adhoc/cmdline.py +30 -0
  114. sierra/plugins/execenv/hpc/adhoc/plugin.py +131 -0
  115. sierra/plugins/execenv/hpc/cmdline.py +137 -0
  116. sierra/plugins/execenv/hpc/local/__init__.py +18 -0
  117. sierra/plugins/execenv/hpc/local/cmdline.py +31 -0
  118. sierra/plugins/execenv/hpc/local/plugin.py +145 -0
  119. sierra/plugins/execenv/hpc/pbs/__init__.py +18 -0
  120. sierra/plugins/execenv/hpc/pbs/cmdline.py +30 -0
  121. sierra/plugins/execenv/hpc/pbs/plugin.py +121 -0
  122. sierra/plugins/execenv/hpc/slurm/__init__.py +18 -0
  123. sierra/plugins/execenv/hpc/slurm/cmdline.py +30 -0
  124. sierra/plugins/execenv/hpc/slurm/plugin.py +133 -0
  125. sierra/plugins/execenv/prefectserver/__init__.py +18 -0
  126. sierra/plugins/execenv/prefectserver/cmdline.py +66 -0
  127. sierra/plugins/execenv/prefectserver/dockerremote/__init__.py +18 -0
  128. sierra/plugins/execenv/prefectserver/dockerremote/cmdline.py +66 -0
  129. sierra/plugins/execenv/prefectserver/dockerremote/plugin.py +132 -0
  130. sierra/plugins/execenv/prefectserver/flow.py +66 -0
  131. sierra/plugins/execenv/prefectserver/local/__init__.py +18 -0
  132. sierra/plugins/execenv/prefectserver/local/cmdline.py +29 -0
  133. sierra/plugins/execenv/prefectserver/local/plugin.py +133 -0
  134. sierra/plugins/{hpc/adhoc → execenv/robot}/__init__.py +1 -0
  135. sierra/plugins/execenv/robot/turtlebot3/__init__.py +18 -0
  136. sierra/plugins/execenv/robot/turtlebot3/plugin.py +204 -0
  137. sierra/plugins/expdef/__init__.py +14 -0
  138. sierra/plugins/expdef/json/__init__.py +14 -0
  139. sierra/plugins/expdef/json/plugin.py +504 -0
  140. sierra/plugins/expdef/xml/__init__.py +14 -0
  141. sierra/plugins/expdef/xml/plugin.py +386 -0
  142. sierra/{core/hpc → plugins/proc}/__init__.py +1 -1
  143. sierra/plugins/proc/collate/__init__.py +15 -0
  144. sierra/plugins/proc/collate/cmdline.py +47 -0
  145. sierra/plugins/proc/collate/plugin.py +271 -0
  146. sierra/plugins/proc/compress/__init__.py +18 -0
  147. sierra/plugins/proc/compress/cmdline.py +47 -0
  148. sierra/plugins/proc/compress/plugin.py +123 -0
  149. sierra/plugins/proc/decompress/__init__.py +18 -0
  150. sierra/plugins/proc/decompress/plugin.py +96 -0
  151. sierra/plugins/proc/imagize/__init__.py +15 -0
  152. sierra/plugins/proc/imagize/cmdline.py +49 -0
  153. sierra/plugins/proc/imagize/plugin.py +270 -0
  154. sierra/plugins/proc/modelrunner/__init__.py +16 -0
  155. sierra/plugins/proc/modelrunner/plugin.py +250 -0
  156. sierra/plugins/proc/statistics/__init__.py +15 -0
  157. sierra/plugins/proc/statistics/cmdline.py +64 -0
  158. sierra/plugins/proc/statistics/plugin.py +390 -0
  159. sierra/plugins/{hpc → prod}/__init__.py +1 -0
  160. sierra/plugins/prod/graphs/__init__.py +18 -0
  161. sierra/plugins/prod/graphs/cmdline.py +269 -0
  162. sierra/plugins/prod/graphs/collate.py +279 -0
  163. sierra/plugins/prod/graphs/inter/__init__.py +13 -0
  164. sierra/plugins/prod/graphs/inter/generate.py +83 -0
  165. sierra/plugins/prod/graphs/inter/heatmap.py +86 -0
  166. sierra/plugins/prod/graphs/inter/line.py +134 -0
  167. sierra/plugins/prod/graphs/intra/__init__.py +15 -0
  168. sierra/plugins/prod/graphs/intra/generate.py +202 -0
  169. sierra/plugins/prod/graphs/intra/heatmap.py +74 -0
  170. sierra/plugins/prod/graphs/intra/line.py +114 -0
  171. sierra/plugins/prod/graphs/plugin.py +103 -0
  172. sierra/plugins/prod/graphs/targets.py +63 -0
  173. sierra/plugins/prod/render/__init__.py +18 -0
  174. sierra/plugins/prod/render/cmdline.py +72 -0
  175. sierra/plugins/prod/render/plugin.py +282 -0
  176. sierra/plugins/storage/__init__.py +5 -0
  177. sierra/plugins/storage/arrow/__init__.py +18 -0
  178. sierra/plugins/storage/arrow/plugin.py +38 -0
  179. sierra/plugins/storage/csv/__init__.py +9 -0
  180. sierra/plugins/storage/csv/plugin.py +12 -5
  181. sierra/version.py +3 -2
  182. sierra_research-1.5.0.dist-info/METADATA +238 -0
  183. sierra_research-1.5.0.dist-info/RECORD +186 -0
  184. {sierra_research-1.3.6.dist-info → sierra_research-1.5.0.dist-info}/WHEEL +1 -2
  185. sierra/core/experiment/xml.py +0 -454
  186. sierra/core/generators/controller_generator_parser.py +0 -34
  187. sierra/core/generators/exp_creator.py +0 -351
  188. sierra/core/generators/exp_generators.py +0 -142
  189. sierra/core/graphs/scatterplot2D.py +0 -109
  190. sierra/core/graphs/stacked_line_graph.py +0 -249
  191. sierra/core/graphs/stacked_surface_graph.py +0 -220
  192. sierra/core/graphs/summary_line_graph.py +0 -369
  193. sierra/core/hpc/cmdline.py +0 -142
  194. sierra/core/models/graphs.py +0 -87
  195. sierra/core/pipeline/stage2/exp_runner.py +0 -286
  196. sierra/core/pipeline/stage3/imagizer.py +0 -149
  197. sierra/core/pipeline/stage3/run_collator.py +0 -317
  198. sierra/core/pipeline/stage3/statistics_calculator.py +0 -478
  199. sierra/core/pipeline/stage4/graph_collator.py +0 -319
  200. sierra/core/pipeline/stage4/inter_exp_graph_generator.py +0 -240
  201. sierra/core/pipeline/stage4/intra_exp_graph_generator.py +0 -317
  202. sierra/core/pipeline/stage4/model_runner.py +0 -168
  203. sierra/core/pipeline/stage4/rendering.py +0 -283
  204. sierra/core/pipeline/stage4/yaml_config_loader.py +0 -103
  205. sierra/core/pipeline/stage5/inter_scenario_comparator.py +0 -328
  206. sierra/core/pipeline/stage5/intra_scenario_comparator.py +0 -989
  207. sierra/core/platform.py +0 -493
  208. sierra/core/plugin_manager.py +0 -369
  209. sierra/core/root_dirpath_generator.py +0 -241
  210. sierra/plugins/hpc/adhoc/plugin.py +0 -125
  211. sierra/plugins/hpc/local/plugin.py +0 -81
  212. sierra/plugins/hpc/pbs/__init__.py +0 -9
  213. sierra/plugins/hpc/pbs/plugin.py +0 -126
  214. sierra/plugins/hpc/slurm/__init__.py +0 -9
  215. sierra/plugins/hpc/slurm/plugin.py +0 -130
  216. sierra/plugins/platform/__init__.py +0 -9
  217. sierra/plugins/platform/argos/__init__.py +0 -9
  218. sierra/plugins/platform/argos/generators/platform_generators.py +0 -383
  219. sierra/plugins/platform/argos/plugin.py +0 -337
  220. sierra/plugins/platform/argos/variables/arena_shape.py +0 -145
  221. sierra/plugins/platform/argos/variables/cameras.py +0 -243
  222. sierra/plugins/platform/argos/variables/constant_density.py +0 -136
  223. sierra/plugins/platform/argos/variables/exp_setup.py +0 -113
  224. sierra/plugins/platform/argos/variables/population_constant_density.py +0 -175
  225. sierra/plugins/platform/argos/variables/population_size.py +0 -102
  226. sierra/plugins/platform/argos/variables/population_variable_density.py +0 -132
  227. sierra/plugins/platform/argos/variables/rendering.py +0 -104
  228. sierra/plugins/platform/ros1gazebo/__init__.py +0 -9
  229. sierra/plugins/platform/ros1gazebo/cmdline.py +0 -213
  230. sierra/plugins/platform/ros1gazebo/generators/platform_generators.py +0 -137
  231. sierra/plugins/platform/ros1gazebo/plugin.py +0 -335
  232. sierra/plugins/platform/ros1gazebo/variables/__init__.py +0 -10
  233. sierra/plugins/platform/ros1gazebo/variables/population_size.py +0 -204
  234. sierra/plugins/platform/ros1robot/__init__.py +0 -9
  235. sierra/plugins/platform/ros1robot/cmdline.py +0 -175
  236. sierra/plugins/platform/ros1robot/generators/platform_generators.py +0 -112
  237. sierra/plugins/platform/ros1robot/plugin.py +0 -373
  238. sierra/plugins/platform/ros1robot/variables/__init__.py +0 -10
  239. sierra/plugins/platform/ros1robot/variables/population_size.py +0 -146
  240. sierra/plugins/robot/__init__.py +0 -9
  241. sierra/plugins/robot/turtlebot3/__init__.py +0 -9
  242. sierra/plugins/robot/turtlebot3/plugin.py +0 -194
  243. sierra_research-1.3.6.data/data/share/man/man1/sierra-cli.1 +0 -2349
  244. sierra_research-1.3.6.data/data/share/man/man7/sierra-examples.7 +0 -488
  245. sierra_research-1.3.6.data/data/share/man/man7/sierra-exec-envs.7 +0 -331
  246. sierra_research-1.3.6.data/data/share/man/man7/sierra-glossary.7 +0 -285
  247. sierra_research-1.3.6.data/data/share/man/man7/sierra-platforms.7 +0 -358
  248. sierra_research-1.3.6.data/data/share/man/man7/sierra-usage.7 +0 -725
  249. sierra_research-1.3.6.data/data/share/man/man7/sierra.7 +0 -78
  250. sierra_research-1.3.6.dist-info/METADATA +0 -500
  251. sierra_research-1.3.6.dist-info/RECORD +0 -133
  252. sierra_research-1.3.6.dist-info/top_level.txt +0 -1
  253. {sierra_research-1.3.6.dist-info → sierra_research-1.5.0.dist-info}/entry_points.txt +0 -0
  254. {sierra_research-1.3.6.dist-info → sierra_research-1.5.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,390 @@
1
+ # Copyright 2019 John Harwell, All rights reserved.
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ """
6
+ Classes for generating statistics within and across experiments in a batch.
7
+ """
8
+
9
+ # Core packages
10
+ import multiprocessing as mp
11
+ import typing as tp
12
+ import queue
13
+ import logging
14
+ import pathlib
15
+ import os
16
+
17
+ # 3rd party packages
18
+ import pandas as pd
19
+ import yaml
20
+
21
+ # Project packages
22
+ import sierra.core.variables.batch_criteria as bc
23
+ from sierra.core import types, utils, stat_kernels, storage, batchroot, config
24
+ from sierra.core.pipeline.stage3 import gather
25
+ import sierra.core.plugin as pm
26
+
27
+ _logger = logging.getLogger(__name__)
28
+
29
+
30
+ class DataGatherer(gather.BaseGatherer):
31
+ """Gather :term:`Raw Output Data` files from all runs.
32
+
33
+ The configured output directory for each run is searched recursively for
34
+ files to gather. To be eligible for gathering and later processing, files
35
+ must:
36
+
37
+ - Be non-empty
38
+
39
+ - Have a suffix which supported by the selected ``--storage`` plugin.
40
+
41
+ - Match an intra/inter experiment graph in ``graphs.yaml``.
42
+ """
43
+
44
+ def __init__(
45
+ self,
46
+ main_config: types.YAMLDict,
47
+ gather_opts: types.SimpleDict,
48
+ processq: mp.Queue,
49
+ ) -> None:
50
+ super().__init__(main_config, gather_opts, processq)
51
+ self.logger = logging.getLogger(__name__)
52
+ config_path = pathlib.Path(gather_opts["project_config_root"]) / pathlib.Path(
53
+ config.kYAML.graphs
54
+ )
55
+ if utils.path_exists(config_path):
56
+ _logger.debug("Filtering gathered data by graph generation targets")
57
+ self.config = yaml.load(utils.utf8open(config_path), yaml.FullLoader)
58
+ else:
59
+ _logger.debug(
60
+ "%s does not exist for project: not filtering gathered data",
61
+ config.kYAML.graphs,
62
+ )
63
+
64
+ def calc_gather_items(
65
+ self, run_output_root: pathlib.Path, exp_name: str
66
+ ) -> tp.List[gather.GatherSpec]:
67
+ to_gather = []
68
+ proj_output_root = run_output_root / str(self.run_metrics_leaf)
69
+ plugin = pm.pipeline.get_plugin_module(self.gather_opts["storage"])
70
+
71
+ for item in proj_output_root.rglob("*"):
72
+ if (
73
+ item.is_dir()
74
+ or not any(s in plugin.suffixes() for s in item.suffixes)
75
+ or item.stat().st_size == 0
76
+ ):
77
+ continue
78
+
79
+ filter_by_intra = "intra-exp" in self.config
80
+ filter_by_inter = "inter-exp" in self.config
81
+
82
+ filtered_intra = any(
83
+ g["src_stem"] in str(item.relative_to(proj_output_root))
84
+ for category in self.config["intra-exp"]
85
+ for g in self.config["intra-exp"][category]
86
+ )
87
+
88
+ filtered_inter = any(
89
+ g["src_stem"] in str(item.relative_to(proj_output_root))
90
+ for category in self.config["inter-exp"]
91
+ for g in self.config["inter-exp"][category]
92
+ )
93
+
94
+ # If both are present, we gather from it if there is a positive
95
+ # match in either graph type category.
96
+ if (
97
+ filter_by_intra
98
+ and filter_by_inter
99
+ and (filtered_intra or filtered_inter)
100
+ ):
101
+ self.logger.trace(
102
+ "Gathering %s: match in %s [intra/inter]",
103
+ item.relative_to(proj_output_root),
104
+ config.kYAML.graphs,
105
+ )
106
+ to_gather.append(
107
+ gather.GatherSpec(
108
+ exp_name=exp_name,
109
+ item_stem_path=item.relative_to(proj_output_root),
110
+ collate_col=None,
111
+ )
112
+ )
113
+ continue
114
+
115
+ # If only intra-exp graphs are present, we gather from it if
116
+ # there is a positive match in that category.
117
+ if filter_by_intra and filtered_intra:
118
+ self.logger.trace(
119
+ "Gathering %s: match in %s [intra]",
120
+ item.relative_to(proj_output_root),
121
+ config.kYAML.graphs,
122
+ )
123
+ to_gather.append(
124
+ gather.GatherSpec(
125
+ exp_name=exp_name,
126
+ item_stem_path=item.relative_to(proj_output_root),
127
+ collate_col=None,
128
+ )
129
+ )
130
+ continue
131
+
132
+ # If only inter-exp graphs are are present, we gather from it if
133
+ # there is a positive match in that category.
134
+ if filter_by_inter and filtered_inter:
135
+ self.logger.trace(
136
+ "Gathering %s: match in %s [inter]",
137
+ item.relative_to(proj_output_root),
138
+ config.kYAML.graphs,
139
+ )
140
+ to_gather.append(
141
+ gather.GatherSpec(
142
+ exp_name=exp_name,
143
+ item_stem_path=item.relative_to(proj_output_root),
144
+ collate_col=None,
145
+ )
146
+ )
147
+ continue
148
+
149
+ return to_gather
150
+
151
+
152
+ def proc_batch_exp(
153
+ main_config: types.YAMLDict,
154
+ cmdopts: types.Cmdopts,
155
+ pathset: batchroot.PathSet,
156
+ criteria: bc.XVarBatchCriteria,
157
+ gatherer_type=DataGatherer,
158
+ ) -> None:
159
+ """Process :term:`Raw Output Data` files for each :term:`Experiment`.
160
+
161
+ Ideally this is done in parallel across experiments, but this can be changed
162
+ to serial if memory on the SIERRA host machine is limited via
163
+ ``--processing-parallelism``.
164
+
165
+ It *IS* faster to do all the gathering at once and THEN do all the
166
+ processing, but that doesn't work for extremely large amounts of data
167
+ generated per :term:`Experimental Run`.
168
+ """
169
+ exp_to_proc = utils.exp_range_calc(
170
+ cmdopts["exp_range"], pathset.output_root, criteria.gen_exp_names()
171
+ )
172
+
173
+ template_input_leaf = pathlib.Path(cmdopts["expdef_template"]).stem
174
+
175
+ stat_opts = {
176
+ "template_input_leaf": template_input_leaf,
177
+ "df_verify": cmdopts["df_verify"],
178
+ "dist_stats": cmdopts["dist_stats"],
179
+ "processing_mem_limit": cmdopts["processing_mem_limit"],
180
+ "storage": cmdopts["storage"],
181
+ "project_config_root": cmdopts["project_config_root"],
182
+ "df_homogenize": cmdopts["df_homogenize"],
183
+ }
184
+
185
+ pool_opts = {}
186
+ parallelism = cmdopts["processing_parallelism"]
187
+
188
+ # Aways need to have at least one of each! If SIERRA is invoked on a machine
189
+ # with 2 or less logical cores, the calculation with psutil.cpu_count() will
190
+ # return 0 for # gatherers.
191
+ pool_opts["n_gatherers"] = max(1, int(parallelism * 0.25))
192
+ pool_opts["n_processors"] = max(1, int(parallelism * 0.75))
193
+
194
+ with mp.Pool(
195
+ processes=pool_opts["n_gatherers"] + pool_opts["n_processors"]
196
+ ) as pool:
197
+ _execute_for_batch(
198
+ main_config, pathset, exp_to_proc, stat_opts, pool_opts, gatherer_type, pool
199
+ )
200
+
201
+ pool.close()
202
+ pool.join()
203
+
204
+
205
+ def _execute_for_batch(
206
+ main_config: types.YAMLDict,
207
+ pathset: batchroot.PathSet,
208
+ exp_to_proc: tp.List[pathlib.Path],
209
+ stat_opts: types.SimpleDict,
210
+ pool_opts: types.SimpleDict,
211
+ gatherer_type,
212
+ pool,
213
+ ) -> None:
214
+ """
215
+ Perform statistics generation on the :term:`Batch Experiment`.
216
+
217
+ Gathers all :term:`Raw Output Data` files FIRST, and *then* does
218
+ processing. This is almost 50% faster than doing a true producer-consumer
219
+ queue, probably because there is much less traffic across processes and/or
220
+ better disk I/O performance.
221
+ """
222
+ m = mp.Manager()
223
+ gatherq = m.Queue()
224
+ processq = m.Queue()
225
+
226
+ for exp in exp_to_proc:
227
+ gatherq.put(exp)
228
+
229
+ _logger.debug(
230
+ "Starting %d gatherers, method=%s",
231
+ pool_opts["n_gatherers"],
232
+ mp.get_start_method(),
233
+ )
234
+
235
+ gathered = [
236
+ pool.apply_async(
237
+ _gather_worker,
238
+ (gatherer_type, gatherq, processq, main_config, stat_opts),
239
+ )
240
+ for i in range(0, pool_opts["n_gatherers"])
241
+ ]
242
+
243
+ _logger.debug(
244
+ "Starting %d processors, method=%s",
245
+ pool_opts["n_processors"],
246
+ mp.get_start_method(),
247
+ )
248
+
249
+ processed = [
250
+ pool.apply_async(_process_worker, (processq, main_config, pathset, stat_opts))
251
+ for i in range(0, pool_opts["n_processors"])
252
+ ]
253
+
254
+ _logger.debug("Waiting for workers to finish")
255
+
256
+ # To capture the otherwise silent crashes when something goes wrong in
257
+ # worker threads. Any assertions will show and any exceptions will be
258
+ # re-raised.
259
+ for g in gathered:
260
+ g.get()
261
+
262
+ for p in processed:
263
+ p.get()
264
+
265
+ _logger.debug("All workers finished")
266
+
267
+ assert (
268
+ gatherq.empty()
269
+ ), f"Finished processing but gather queue has {gatherq.qsize()} items?"
270
+
271
+ assert (
272
+ processq.empty()
273
+ ), f"Finished processing but process queue has {processq.qsize()} items?"
274
+
275
+
276
+ def _gather_worker(
277
+ gatherer_type,
278
+ gatherq: mp.Queue,
279
+ processq: mp.Queue,
280
+ main_config: types.YAMLDict,
281
+ stat_opts: tp.Dict[str, str],
282
+ ) -> None:
283
+ gatherer = gatherer_type(main_config, stat_opts, processq)
284
+
285
+ # Wait for 2 seconds after the queue is empty before bailing, at the
286
+ # start. If that is not long enough then exponentially increase from
287
+ # there until you find how long it takes to get the first item in the
288
+ # queue, and use that as the appropriate timeout (plus a little
289
+ # margin).
290
+ timeout = 3
291
+ got_item = False
292
+ n_tries = 0
293
+ while n_tries < config.kGatherWorkerRetries:
294
+ try:
295
+ exp_output_root = gatherq.get(True, timeout)
296
+ gatherer(exp_output_root)
297
+ gatherq.task_done()
298
+ got_item = True
299
+
300
+ except queue.Empty:
301
+ if got_item:
302
+ break
303
+
304
+ timeout *= 2
305
+ n_tries += 1
306
+
307
+ _logger.trace(f"Gather worker {os.getpid()} exit")
308
+
309
+
310
+ def _process_worker(
311
+ processq: mp.Queue,
312
+ main_config: types.YAMLDict,
313
+ pathset: batchroot.PathSet,
314
+ stat_opts: tp.Dict[str, str],
315
+ ) -> None:
316
+ # Wait for 2 seconds after the queue is empty before bailing, at the
317
+ # start. If that is not long enough then exponentially increase from
318
+ # there until you find how long it takes to get the first item in the
319
+ # queue, and use that as the appropriate timeout (plus a little
320
+ # margin).
321
+ timeout = 3
322
+ got_item = False
323
+ n_tries = 0
324
+ while n_tries < config.kProcessWorkerRetries:
325
+ try:
326
+ spec = processq.get(True, timeout)
327
+
328
+ _proc_single_exp(main_config, stat_opts, pathset, spec)
329
+ processq.task_done()
330
+ got_item = True
331
+
332
+ except queue.Empty:
333
+ if got_item:
334
+ break
335
+
336
+ timeout *= 2
337
+ n_tries += 1
338
+ _logger.trace(f"Process worker {os.getpid()} exit")
339
+
340
+
341
+ def _proc_single_exp(
342
+ main_config: types.YAMLDict,
343
+ stat_opts: types.StrDict,
344
+ pathset: batchroot.PathSet,
345
+ spec: gather.ProcessSpec,
346
+ ) -> None:
347
+ """Generate statistics from output files for all runs within an experiment.
348
+
349
+ .. IMPORTANT:: You *CANNOT* use logging ANYWHERE during processing .csv
350
+ files. Why ? I *think* because of a bug in the logging module
351
+ it If you get unlucky enough to spawn the process which
352
+ enters the __call__() method in this class while another
353
+ logging statement is in progress (and is therefore holding an
354
+ internal logging module lock), then the underlying fork()
355
+ call will copy the lock in the acquired state. Then, when
356
+ this class goes to try to log something, it deadlocks with
357
+ it.
358
+
359
+ You also can't just create loggers with unique names, as this
360
+ seems to be something like the GIL, but for the logging
361
+ module. Sometimes python sucks.
362
+ """
363
+ csv_concat = pd.concat(spec.dfs)
364
+ exp_stat_root = pathset.stat_root / spec.gather.exp_name
365
+
366
+ utils.dir_create_checked(exp_stat_root, exist_ok=True)
367
+
368
+ by_row_index = csv_concat.groupby(csv_concat.index)
369
+
370
+ dfs = {}
371
+
372
+ if stat_opts["dist_stats"] in ["none", "all"]:
373
+ dfs.update(stat_kernels.mean.from_groupby(by_row_index))
374
+
375
+ if stat_opts["dist_stats"] in ["conf95", "all"]:
376
+ dfs.update(stat_kernels.conf95.from_groupby(by_row_index))
377
+
378
+ if stat_opts["dist_stats"] in ["bw", "all"]:
379
+ dfs.update(stat_kernels.bw.from_groupby(by_row_index))
380
+
381
+ for ext, df in dfs.items():
382
+ opath = exp_stat_root / spec.gather.item_stem_path
383
+ utils.dir_create_checked(opath.parent, exist_ok=True)
384
+ opath = opath.with_suffix(ext)
385
+
386
+ df = utils.df_fill(df, stat_opts["df_homogenize"])
387
+ storage.df_write(df, opath, "storage.csv", index=False)
388
+
389
+
390
+ __all__ = ["proc_batch_exp"]
@@ -1,6 +1,7 @@
1
1
  # Copyright 2021 John Harwell, All rights reserved.
2
2
  #
3
3
  # SPDX-License-Identifier: MIT
4
+ """Common functionality ``--prod`` plugins."""
4
5
 
5
6
  # Core packages
6
7
 
@@ -0,0 +1,18 @@
1
+ #
2
+ # Copyright 2024 John Harwell, All rights reserved.
3
+ #
4
+ # SPDX-License Identifier: MIT
5
+ #
6
+ """
7
+ Container module for graph generation in stage 4.
8
+ """
9
+
10
+ # Core packages
11
+
12
+ # 3rd party packages
13
+
14
+ # Project packages
15
+
16
+
17
+ def sierra_plugin_type() -> str:
18
+ return "pipeline"
@@ -0,0 +1,269 @@
1
+ #
2
+ # Copyright 2025 John Harwell, All rights reserved.
3
+ #
4
+ # SPDX-License Identifier: MIT
5
+ #
6
+
7
+ # Core packages
8
+ import typing as tp
9
+ import argparse
10
+
11
+ # 3rd party packages
12
+
13
+ # Project packages
14
+ from sierra.core import types
15
+ from sierra.plugins import PluginCmdline
16
+
17
+
18
+ def build(
19
+ parents: tp.List[argparse.ArgumentParser], stages: tp.List[int]
20
+ ) -> PluginCmdline:
21
+ """
22
+ Get a cmdline parser supporting the ``prod.graphs`` product plugin.
23
+ """
24
+ cmdline = PluginCmdline(parents, stages)
25
+ _build_multistage(cmdline)
26
+ _build_stage4(cmdline)
27
+ return cmdline
28
+
29
+
30
+ def _build_multistage(cmdline: PluginCmdline) -> PluginCmdline:
31
+ # Plotting options
32
+ cmdline.multistage.add_argument(
33
+ "--plot-log-xscale",
34
+ help="""
35
+ Place the set of X values used to generate intra- and
36
+ inter-experiment graphs into the logarithmic space. Mainly useful
37
+ when the batch criteria involves large system sizes, so that the
38
+ plots are more readable.
39
+ """
40
+ + cmdline.graphs_applicable_doc(
41
+ [":py:func:`Summary Line <sierra.core.graphs.summary_line.generate>`"]
42
+ )
43
+ + cmdline.stage_usage_doc([4, 5]),
44
+ action="store_true",
45
+ )
46
+
47
+ cmdline.multistage.add_argument(
48
+ "--plot-enumerated-xscale",
49
+ help="""
50
+ Instead of using the values generated by a given batch criteria for
51
+ the X values, use an enumerated list[0, ..., len(X value) - 1].
52
+ Mainly useful when the batch criteria involves large system sizes,
53
+ so that the plots are more readable.
54
+ """
55
+ + cmdline.graphs_applicable_doc(
56
+ [":py:func:`Summary Line <sierra.core.graphs.summary_line.generate>`"]
57
+ )
58
+ + cmdline.stage_usage_doc([4, 5]),
59
+ action="store_true",
60
+ )
61
+
62
+ cmdline.multistage.add_argument(
63
+ "--plot-log-yscale",
64
+ help="""
65
+ Place the set of Y values used to generate intra - and
66
+ inter-experiment graphs into the logarithmic space. Mainly useful
67
+ when the batch criteria involves large system sizes, so that the
68
+ plots are more readable.
69
+ """
70
+ + cmdline.graphs_applicable_doc(
71
+ [
72
+ ":py:func:`Summary Line <sierra.core.graphs.summary_line.generate>`",
73
+ ":py:func:`Stacked Line <sierra.core.graphs.stacked_line.generate>`",
74
+ ]
75
+ )
76
+ + cmdline.stage_usage_doc([4, 5]),
77
+ action="store_true",
78
+ )
79
+
80
+ cmdline.multistage.add_argument(
81
+ "--plot-primary-axis",
82
+ type=int,
83
+ help="""
84
+ This option allows you to override the primary axis, which is
85
+ normally is computed based on the batch criteria.
86
+
87
+ For example, in a bivariate batch criteria composed of
88
+
89
+ - :ref:`plugins/engine/argos/bc/population-size` on the X axis
90
+ (rows)
91
+
92
+ - Another batch criteria which does not affect system size
93
+ (columns)
94
+
95
+ Metrics will be calculated by `computing` across .csv rows and
96
+ `projecting` down the columns by default, since system size will
97
+ only vary within a row. Passing a value of 1 to this option will
98
+ override this calculation, which can be useful in bivariate batch
99
+ criteria in which you are interested in the effect of the OTHER
100
+ non-size criteria on various performance measures.
101
+
102
+ 0=criteria of interest varies across `rows`.
103
+
104
+ 1=criteria of interest varies across `columns`.
105
+
106
+ This option only affects generating graphs from bivariate batch
107
+ criteria.
108
+ """
109
+ + cmdline.graphs_applicable_doc(
110
+ [
111
+ ":py:func:`Heatmap <sierra.core.graphs.heatmap.generate>`",
112
+ ":py:func:`Stacked Line <sierra.core.graphs.stacked_line.generate>`",
113
+ ]
114
+ )
115
+ + cmdline.stage_usage_doc([4, 5]),
116
+ default=None,
117
+ )
118
+
119
+ cmdline.multistage.add_argument(
120
+ "--plot-large-text",
121
+ help="""
122
+ This option specifies that the title, X/Y axis labels/tick labels
123
+ should be larger than the SIERRA default. This is useful when
124
+ generating graphs suitable for two column paper format where the
125
+ default text size for rendered graphs will be too small to see
126
+ easily. The SIERRA defaults are generally fine for the one
127
+ column/journal paper format.
128
+ """
129
+ + cmdline.stage_usage_doc([4, 5]),
130
+ action="store_true",
131
+ )
132
+
133
+ cmdline.multistage.add_argument(
134
+ "--plot-transpose-graphs",
135
+ help="""
136
+ Transpose the X, Y axes in generated graphs. Useful as a general
137
+ way to tweak graphs for best use of space within a paper.
138
+
139
+ .. versionchanged:: 1.2.20
140
+
141
+ Renamed from ``--transpose-graphs`` to make its relation to other
142
+ plotting options clearer.
143
+ """
144
+ + cmdline.graphs_applicable_doc(
145
+ [":py:func:`Heatmap <sierra.core.graphs.heatmap.generate>`"]
146
+ )
147
+ + cmdline.stage_usage_doc([4, 5]),
148
+ action="store_true",
149
+ )
150
+ return cmdline
151
+
152
+
153
+ def _build_stage4(cmdline: PluginCmdline) -> PluginCmdline:
154
+ cmdline.stage4.add_argument(
155
+ "--graphs-backend",
156
+ choices=["matplotlib", "bokeh"],
157
+ help="""
158
+ Specify the default backend to be used when generating plots. Can
159
+ be overriden on a per-graph basis.
160
+
161
+ - ``matplotlib`` - Use matplotlib to generate static PNG
162
+ images.
163
+
164
+ - ``bokeh`` - Use bokeh to generate stand-alone HTML files
165
+ containing interactive bokeh visualizations. Files are
166
+ suitable for inclusion in static webpages, viewing in a
167
+ browser, etc.
168
+
169
+ See :ref:`plugins/prod/graphs` for more information.
170
+ """,
171
+ default="matplotlib",
172
+ )
173
+ cmdline.stage4.add_argument(
174
+ "--exp-n-datapoints-factor",
175
+ type=float,
176
+ help="""
177
+ Specify an additional multiplicative factor for computing the # of
178
+ datapoints captured duration an :term:`Experiment` to modify the
179
+ duration * ticks_per_sec default.
180
+ """,
181
+ default=1.0,
182
+ )
183
+ cmdline.stage4.add_argument(
184
+ "--exp-graphs",
185
+ choices=["intra", "inter", "all", "none"],
186
+ help="""
187
+ Specify which types of graphs should be generated from experimental
188
+ results:
189
+
190
+ - ``intra`` - Generate intra-experiment graphs from the results
191
+ of a single experiment within a batch, for each experiment in
192
+ the batch(this can take a long time with large batch
193
+ experiments). If any intra-experiment models are defined and
194
+ enabled, those are run and the results placed on appropriate
195
+ graphs.
196
+
197
+ - ``inter`` - Generate inter-experiment graphs _across_ the
198
+ results of all experiments in a batch. These are very fast
199
+ to generate, regardless of batch experiment size. If any
200
+ inter-experiment models are defined and enabled, those are
201
+ run and the results placed on appropriate graphs.
202
+
203
+ - ``all`` - Generate all types of graphs.
204
+
205
+ - ``none`` - Skip graph generation.
206
+ """
207
+ + cmdline.stage_usage_doc([4]),
208
+ default="all",
209
+ )
210
+
211
+ cmdline.stage4.add_argument(
212
+ "--project-no-LN",
213
+ help="""
214
+ Specify that the intra-experiment and inter-experiment linegraphs
215
+ defined in project YAML configuration should not be generated.
216
+ Useful if you are working on something which results in the
217
+ generation of other types of graphs, and the generation of those
218
+ linegraphs is not currently needed only slows down your development
219
+ cycle.
220
+
221
+ Model linegraphs are still generated, if applicable.
222
+ """,
223
+ action="store_true",
224
+ )
225
+
226
+ cmdline.stage4.add_argument(
227
+ "--project-no-HM",
228
+ help="""
229
+ Specify that the intra-experiment heatmaps defined in project YAML
230
+ configuration should not be generated. Useful if:
231
+
232
+ - You are working on something which results in the generation
233
+ of other types of graphs, and the generation of heatmaps only
234
+ slows down your development cycle.
235
+
236
+ - You are working on stage5 comparison graphs for bivariate
237
+ batch criteria, and re-generating many heatmaps during stage4
238
+ is taking too long.
239
+
240
+ Model heatmaps are still generated, if applicable.
241
+
242
+ .. versionadded:: 1.2.20
243
+ """,
244
+ action="store_true",
245
+ )
246
+
247
+ return cmdline
248
+
249
+
250
+ def to_cmdopts(args: argparse.Namespace) -> types.Cmdopts:
251
+ return {
252
+ # multistage
253
+ "plot_log_xscale": args.plot_log_xscale,
254
+ "plot_enumerated_xscale": args.plot_enumerated_xscale,
255
+ "plot_log_yscale": args.plot_log_yscale,
256
+ "plot_primary_axis": args.plot_primary_axis,
257
+ "plot_large_text": args.plot_large_text,
258
+ "plot_transpose_graphs": args.plot_transpose_graphs,
259
+ # stage 4
260
+ "graphs_backend": args.graphs_backend,
261
+ "exp_n_datapoints_factor": args.exp_n_datapoints_factor,
262
+ "exp_graphs": args.exp_graphs,
263
+ "project_no_LN": args.project_no_LN,
264
+ "project_no_HM": args.project_no_HM,
265
+ }
266
+
267
+
268
+ def sphinx_cmdline_multistage():
269
+ return build([], [3, 4, 5]).parser