sierra-research 1.3.6__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (254) hide show
  1. sierra/__init__.py +3 -3
  2. sierra/core/__init__.py +3 -3
  3. sierra/core/batchroot.py +223 -0
  4. sierra/core/cmdline.py +681 -1057
  5. sierra/core/compare.py +11 -0
  6. sierra/core/config.py +96 -88
  7. sierra/core/engine.py +306 -0
  8. sierra/core/execenv.py +380 -0
  9. sierra/core/expdef.py +11 -0
  10. sierra/core/experiment/__init__.py +1 -0
  11. sierra/core/experiment/bindings.py +150 -101
  12. sierra/core/experiment/definition.py +414 -245
  13. sierra/core/experiment/spec.py +83 -85
  14. sierra/core/exproot.py +44 -0
  15. sierra/core/generators/__init__.py +10 -0
  16. sierra/core/generators/experiment.py +528 -0
  17. sierra/core/generators/generator_factory.py +138 -137
  18. sierra/core/graphs/__init__.py +23 -0
  19. sierra/core/graphs/bcbridge.py +94 -0
  20. sierra/core/graphs/heatmap.py +245 -324
  21. sierra/core/graphs/pathset.py +27 -0
  22. sierra/core/graphs/schema.py +77 -0
  23. sierra/core/graphs/stacked_line.py +341 -0
  24. sierra/core/graphs/summary_line.py +506 -0
  25. sierra/core/logging.py +3 -2
  26. sierra/core/models/__init__.py +3 -1
  27. sierra/core/models/info.py +19 -0
  28. sierra/core/models/interface.py +52 -122
  29. sierra/core/pipeline/__init__.py +2 -5
  30. sierra/core/pipeline/pipeline.py +228 -126
  31. sierra/core/pipeline/stage1/__init__.py +10 -0
  32. sierra/core/pipeline/stage1/pipeline_stage1.py +45 -31
  33. sierra/core/pipeline/stage2/__init__.py +10 -0
  34. sierra/core/pipeline/stage2/pipeline_stage2.py +8 -11
  35. sierra/core/pipeline/stage2/runner.py +401 -0
  36. sierra/core/pipeline/stage3/__init__.py +12 -0
  37. sierra/core/pipeline/stage3/gather.py +321 -0
  38. sierra/core/pipeline/stage3/pipeline_stage3.py +37 -84
  39. sierra/core/pipeline/stage4/__init__.py +12 -2
  40. sierra/core/pipeline/stage4/pipeline_stage4.py +36 -354
  41. sierra/core/pipeline/stage5/__init__.py +12 -0
  42. sierra/core/pipeline/stage5/pipeline_stage5.py +33 -208
  43. sierra/core/pipeline/yaml.py +48 -0
  44. sierra/core/plugin.py +529 -62
  45. sierra/core/proc.py +11 -0
  46. sierra/core/prod.py +11 -0
  47. sierra/core/ros1/__init__.py +5 -1
  48. sierra/core/ros1/callbacks.py +22 -21
  49. sierra/core/ros1/cmdline.py +59 -88
  50. sierra/core/ros1/generators.py +159 -175
  51. sierra/core/ros1/variables/__init__.py +3 -0
  52. sierra/core/ros1/variables/exp_setup.py +122 -116
  53. sierra/core/startup.py +106 -76
  54. sierra/core/stat_kernels.py +4 -5
  55. sierra/core/storage.py +13 -32
  56. sierra/core/trampoline.py +30 -0
  57. sierra/core/types.py +116 -71
  58. sierra/core/utils.py +103 -106
  59. sierra/core/variables/__init__.py +1 -1
  60. sierra/core/variables/base_variable.py +12 -17
  61. sierra/core/variables/batch_criteria.py +387 -481
  62. sierra/core/variables/builtin.py +135 -0
  63. sierra/core/variables/exp_setup.py +19 -39
  64. sierra/core/variables/population_size.py +72 -76
  65. sierra/core/variables/variable_density.py +44 -68
  66. sierra/core/vector.py +1 -1
  67. sierra/main.py +256 -88
  68. sierra/plugins/__init__.py +119 -0
  69. sierra/plugins/compare/__init__.py +14 -0
  70. sierra/plugins/compare/graphs/__init__.py +19 -0
  71. sierra/plugins/compare/graphs/cmdline.py +120 -0
  72. sierra/plugins/compare/graphs/comparator.py +291 -0
  73. sierra/plugins/compare/graphs/inter_controller.py +531 -0
  74. sierra/plugins/compare/graphs/inter_scenario.py +297 -0
  75. sierra/plugins/compare/graphs/namecalc.py +53 -0
  76. sierra/plugins/compare/graphs/outputroot.py +73 -0
  77. sierra/plugins/compare/graphs/plugin.py +147 -0
  78. sierra/plugins/compare/graphs/preprocess.py +172 -0
  79. sierra/plugins/compare/graphs/schema.py +37 -0
  80. sierra/plugins/engine/__init__.py +14 -0
  81. sierra/plugins/engine/argos/__init__.py +18 -0
  82. sierra/plugins/{platform → engine}/argos/cmdline.py +144 -151
  83. sierra/plugins/{platform/argos/variables → engine/argos/generators}/__init__.py +5 -0
  84. sierra/plugins/engine/argos/generators/engine.py +394 -0
  85. sierra/plugins/engine/argos/plugin.py +393 -0
  86. sierra/plugins/{platform/argos/generators → engine/argos/variables}/__init__.py +5 -0
  87. sierra/plugins/engine/argos/variables/arena_shape.py +183 -0
  88. sierra/plugins/engine/argos/variables/cameras.py +240 -0
  89. sierra/plugins/engine/argos/variables/constant_density.py +112 -0
  90. sierra/plugins/engine/argos/variables/exp_setup.py +82 -0
  91. sierra/plugins/{platform → engine}/argos/variables/physics_engines.py +83 -87
  92. sierra/plugins/engine/argos/variables/population_constant_density.py +178 -0
  93. sierra/plugins/engine/argos/variables/population_size.py +115 -0
  94. sierra/plugins/engine/argos/variables/population_variable_density.py +123 -0
  95. sierra/plugins/engine/argos/variables/rendering.py +108 -0
  96. sierra/plugins/engine/ros1gazebo/__init__.py +18 -0
  97. sierra/plugins/engine/ros1gazebo/cmdline.py +175 -0
  98. sierra/plugins/{platform/ros1robot → engine/ros1gazebo}/generators/__init__.py +5 -0
  99. sierra/plugins/engine/ros1gazebo/generators/engine.py +125 -0
  100. sierra/plugins/engine/ros1gazebo/plugin.py +404 -0
  101. sierra/plugins/engine/ros1gazebo/variables/__init__.py +15 -0
  102. sierra/plugins/engine/ros1gazebo/variables/population_size.py +214 -0
  103. sierra/plugins/engine/ros1robot/__init__.py +18 -0
  104. sierra/plugins/engine/ros1robot/cmdline.py +159 -0
  105. sierra/plugins/{platform/ros1gazebo → engine/ros1robot}/generators/__init__.py +4 -0
  106. sierra/plugins/engine/ros1robot/generators/engine.py +95 -0
  107. sierra/plugins/engine/ros1robot/plugin.py +410 -0
  108. sierra/plugins/{hpc/local → engine/ros1robot/variables}/__init__.py +5 -0
  109. sierra/plugins/engine/ros1robot/variables/population_size.py +146 -0
  110. sierra/plugins/execenv/__init__.py +11 -0
  111. sierra/plugins/execenv/hpc/__init__.py +18 -0
  112. sierra/plugins/execenv/hpc/adhoc/__init__.py +18 -0
  113. sierra/plugins/execenv/hpc/adhoc/cmdline.py +30 -0
  114. sierra/plugins/execenv/hpc/adhoc/plugin.py +131 -0
  115. sierra/plugins/execenv/hpc/cmdline.py +137 -0
  116. sierra/plugins/execenv/hpc/local/__init__.py +18 -0
  117. sierra/plugins/execenv/hpc/local/cmdline.py +31 -0
  118. sierra/plugins/execenv/hpc/local/plugin.py +145 -0
  119. sierra/plugins/execenv/hpc/pbs/__init__.py +18 -0
  120. sierra/plugins/execenv/hpc/pbs/cmdline.py +30 -0
  121. sierra/plugins/execenv/hpc/pbs/plugin.py +121 -0
  122. sierra/plugins/execenv/hpc/slurm/__init__.py +18 -0
  123. sierra/plugins/execenv/hpc/slurm/cmdline.py +30 -0
  124. sierra/plugins/execenv/hpc/slurm/plugin.py +133 -0
  125. sierra/plugins/execenv/prefectserver/__init__.py +18 -0
  126. sierra/plugins/execenv/prefectserver/cmdline.py +66 -0
  127. sierra/plugins/execenv/prefectserver/dockerremote/__init__.py +18 -0
  128. sierra/plugins/execenv/prefectserver/dockerremote/cmdline.py +66 -0
  129. sierra/plugins/execenv/prefectserver/dockerremote/plugin.py +132 -0
  130. sierra/plugins/execenv/prefectserver/flow.py +66 -0
  131. sierra/plugins/execenv/prefectserver/local/__init__.py +18 -0
  132. sierra/plugins/execenv/prefectserver/local/cmdline.py +29 -0
  133. sierra/plugins/execenv/prefectserver/local/plugin.py +133 -0
  134. sierra/plugins/{hpc/adhoc → execenv/robot}/__init__.py +1 -0
  135. sierra/plugins/execenv/robot/turtlebot3/__init__.py +18 -0
  136. sierra/plugins/execenv/robot/turtlebot3/plugin.py +204 -0
  137. sierra/plugins/expdef/__init__.py +14 -0
  138. sierra/plugins/expdef/json/__init__.py +14 -0
  139. sierra/plugins/expdef/json/plugin.py +504 -0
  140. sierra/plugins/expdef/xml/__init__.py +14 -0
  141. sierra/plugins/expdef/xml/plugin.py +386 -0
  142. sierra/{core/hpc → plugins/proc}/__init__.py +1 -1
  143. sierra/plugins/proc/collate/__init__.py +15 -0
  144. sierra/plugins/proc/collate/cmdline.py +47 -0
  145. sierra/plugins/proc/collate/plugin.py +271 -0
  146. sierra/plugins/proc/compress/__init__.py +18 -0
  147. sierra/plugins/proc/compress/cmdline.py +47 -0
  148. sierra/plugins/proc/compress/plugin.py +123 -0
  149. sierra/plugins/proc/decompress/__init__.py +18 -0
  150. sierra/plugins/proc/decompress/plugin.py +96 -0
  151. sierra/plugins/proc/imagize/__init__.py +15 -0
  152. sierra/plugins/proc/imagize/cmdline.py +49 -0
  153. sierra/plugins/proc/imagize/plugin.py +270 -0
  154. sierra/plugins/proc/modelrunner/__init__.py +16 -0
  155. sierra/plugins/proc/modelrunner/plugin.py +250 -0
  156. sierra/plugins/proc/statistics/__init__.py +15 -0
  157. sierra/plugins/proc/statistics/cmdline.py +64 -0
  158. sierra/plugins/proc/statistics/plugin.py +390 -0
  159. sierra/plugins/{hpc → prod}/__init__.py +1 -0
  160. sierra/plugins/prod/graphs/__init__.py +18 -0
  161. sierra/plugins/prod/graphs/cmdline.py +269 -0
  162. sierra/plugins/prod/graphs/collate.py +279 -0
  163. sierra/plugins/prod/graphs/inter/__init__.py +13 -0
  164. sierra/plugins/prod/graphs/inter/generate.py +83 -0
  165. sierra/plugins/prod/graphs/inter/heatmap.py +86 -0
  166. sierra/plugins/prod/graphs/inter/line.py +134 -0
  167. sierra/plugins/prod/graphs/intra/__init__.py +15 -0
  168. sierra/plugins/prod/graphs/intra/generate.py +202 -0
  169. sierra/plugins/prod/graphs/intra/heatmap.py +74 -0
  170. sierra/plugins/prod/graphs/intra/line.py +114 -0
  171. sierra/plugins/prod/graphs/plugin.py +103 -0
  172. sierra/plugins/prod/graphs/targets.py +63 -0
  173. sierra/plugins/prod/render/__init__.py +18 -0
  174. sierra/plugins/prod/render/cmdline.py +72 -0
  175. sierra/plugins/prod/render/plugin.py +282 -0
  176. sierra/plugins/storage/__init__.py +5 -0
  177. sierra/plugins/storage/arrow/__init__.py +18 -0
  178. sierra/plugins/storage/arrow/plugin.py +38 -0
  179. sierra/plugins/storage/csv/__init__.py +9 -0
  180. sierra/plugins/storage/csv/plugin.py +12 -5
  181. sierra/version.py +3 -2
  182. sierra_research-1.5.0.dist-info/METADATA +238 -0
  183. sierra_research-1.5.0.dist-info/RECORD +186 -0
  184. {sierra_research-1.3.6.dist-info → sierra_research-1.5.0.dist-info}/WHEEL +1 -2
  185. sierra/core/experiment/xml.py +0 -454
  186. sierra/core/generators/controller_generator_parser.py +0 -34
  187. sierra/core/generators/exp_creator.py +0 -351
  188. sierra/core/generators/exp_generators.py +0 -142
  189. sierra/core/graphs/scatterplot2D.py +0 -109
  190. sierra/core/graphs/stacked_line_graph.py +0 -249
  191. sierra/core/graphs/stacked_surface_graph.py +0 -220
  192. sierra/core/graphs/summary_line_graph.py +0 -369
  193. sierra/core/hpc/cmdline.py +0 -142
  194. sierra/core/models/graphs.py +0 -87
  195. sierra/core/pipeline/stage2/exp_runner.py +0 -286
  196. sierra/core/pipeline/stage3/imagizer.py +0 -149
  197. sierra/core/pipeline/stage3/run_collator.py +0 -317
  198. sierra/core/pipeline/stage3/statistics_calculator.py +0 -478
  199. sierra/core/pipeline/stage4/graph_collator.py +0 -319
  200. sierra/core/pipeline/stage4/inter_exp_graph_generator.py +0 -240
  201. sierra/core/pipeline/stage4/intra_exp_graph_generator.py +0 -317
  202. sierra/core/pipeline/stage4/model_runner.py +0 -168
  203. sierra/core/pipeline/stage4/rendering.py +0 -283
  204. sierra/core/pipeline/stage4/yaml_config_loader.py +0 -103
  205. sierra/core/pipeline/stage5/inter_scenario_comparator.py +0 -328
  206. sierra/core/pipeline/stage5/intra_scenario_comparator.py +0 -989
  207. sierra/core/platform.py +0 -493
  208. sierra/core/plugin_manager.py +0 -369
  209. sierra/core/root_dirpath_generator.py +0 -241
  210. sierra/plugins/hpc/adhoc/plugin.py +0 -125
  211. sierra/plugins/hpc/local/plugin.py +0 -81
  212. sierra/plugins/hpc/pbs/__init__.py +0 -9
  213. sierra/plugins/hpc/pbs/plugin.py +0 -126
  214. sierra/plugins/hpc/slurm/__init__.py +0 -9
  215. sierra/plugins/hpc/slurm/plugin.py +0 -130
  216. sierra/plugins/platform/__init__.py +0 -9
  217. sierra/plugins/platform/argos/__init__.py +0 -9
  218. sierra/plugins/platform/argos/generators/platform_generators.py +0 -383
  219. sierra/plugins/platform/argos/plugin.py +0 -337
  220. sierra/plugins/platform/argos/variables/arena_shape.py +0 -145
  221. sierra/plugins/platform/argos/variables/cameras.py +0 -243
  222. sierra/plugins/platform/argos/variables/constant_density.py +0 -136
  223. sierra/plugins/platform/argos/variables/exp_setup.py +0 -113
  224. sierra/plugins/platform/argos/variables/population_constant_density.py +0 -175
  225. sierra/plugins/platform/argos/variables/population_size.py +0 -102
  226. sierra/plugins/platform/argos/variables/population_variable_density.py +0 -132
  227. sierra/plugins/platform/argos/variables/rendering.py +0 -104
  228. sierra/plugins/platform/ros1gazebo/__init__.py +0 -9
  229. sierra/plugins/platform/ros1gazebo/cmdline.py +0 -213
  230. sierra/plugins/platform/ros1gazebo/generators/platform_generators.py +0 -137
  231. sierra/plugins/platform/ros1gazebo/plugin.py +0 -335
  232. sierra/plugins/platform/ros1gazebo/variables/__init__.py +0 -10
  233. sierra/plugins/platform/ros1gazebo/variables/population_size.py +0 -204
  234. sierra/plugins/platform/ros1robot/__init__.py +0 -9
  235. sierra/plugins/platform/ros1robot/cmdline.py +0 -175
  236. sierra/plugins/platform/ros1robot/generators/platform_generators.py +0 -112
  237. sierra/plugins/platform/ros1robot/plugin.py +0 -373
  238. sierra/plugins/platform/ros1robot/variables/__init__.py +0 -10
  239. sierra/plugins/platform/ros1robot/variables/population_size.py +0 -146
  240. sierra/plugins/robot/__init__.py +0 -9
  241. sierra/plugins/robot/turtlebot3/__init__.py +0 -9
  242. sierra/plugins/robot/turtlebot3/plugin.py +0 -194
  243. sierra_research-1.3.6.data/data/share/man/man1/sierra-cli.1 +0 -2349
  244. sierra_research-1.3.6.data/data/share/man/man7/sierra-examples.7 +0 -488
  245. sierra_research-1.3.6.data/data/share/man/man7/sierra-exec-envs.7 +0 -331
  246. sierra_research-1.3.6.data/data/share/man/man7/sierra-glossary.7 +0 -285
  247. sierra_research-1.3.6.data/data/share/man/man7/sierra-platforms.7 +0 -358
  248. sierra_research-1.3.6.data/data/share/man/man7/sierra-usage.7 +0 -725
  249. sierra_research-1.3.6.data/data/share/man/man7/sierra.7 +0 -78
  250. sierra_research-1.3.6.dist-info/METADATA +0 -500
  251. sierra_research-1.3.6.dist-info/RECORD +0 -133
  252. sierra_research-1.3.6.dist-info/top_level.txt +0 -1
  253. {sierra_research-1.3.6.dist-info → sierra_research-1.5.0.dist-info}/entry_points.txt +0 -0
  254. {sierra_research-1.3.6.dist-info → sierra_research-1.5.0.dist-info/licenses}/LICENSE +0 -0
@@ -1,478 +0,0 @@
1
- # Copyright 2019 John Harwell, All rights reserved.
2
- #
3
- # SPDX-License-Identifier: MIT
4
-
5
- """
6
- Classes for generating statistics within and across experiments in a batch.
7
- """
8
-
9
- # Core packages
10
- import re
11
- import multiprocessing as mp
12
- import typing as tp
13
- import queue
14
- import time
15
- import datetime
16
- import logging
17
- import pathlib
18
-
19
- # 3rd party packages
20
- import pandas as pd
21
- import psutil
22
-
23
- # Project packages
24
- import sierra.core.variables.batch_criteria as bc
25
- from sierra.core import types, utils, stat_kernels, storage, config
26
-
27
-
28
- class GatherSpec:
29
- """
30
- Data class for specifying .csv files to gather from an :term:`Experiment`.
31
- """
32
-
33
- def __init__(self,
34
- exp_name: str,
35
- item_stem: str,
36
- imagize_csv_stem: tp.Optional[str]):
37
- self.exp_name = exp_name
38
- self.item_stem = item_stem
39
- self.imagize_csv_stem = imagize_csv_stem
40
-
41
- def for_imagizing(self):
42
- return self.imagize_csv_stem is not None
43
-
44
-
45
- class BatchExpParallelCalculator:
46
- """Process :term:`Output .csv` files for each experiment in the batch.
47
-
48
- In parallel for speed.
49
- """
50
-
51
- def __init__(self, main_config: dict, cmdopts: types.Cmdopts):
52
- self.main_config = main_config
53
- self.cmdopts = cmdopts
54
- self.logger = logging.getLogger(__name__)
55
-
56
- def __call__(self, criteria: bc.IConcreteBatchCriteria) -> None:
57
-
58
- exp_to_avg = utils.exp_range_calc(self.cmdopts,
59
- self.cmdopts['batch_output_root'],
60
- criteria)
61
-
62
- template_input_leaf = pathlib.Path(self.cmdopts['template_input_file']).stem
63
-
64
- avg_opts = {
65
- 'template_input_leaf': template_input_leaf,
66
- 'df_skip_verify': self.cmdopts['df_skip_verify'],
67
- 'dist_stats': self.cmdopts['dist_stats'],
68
- 'project_imagizing': self.cmdopts['project_imagizing'],
69
- 'processing_mem_limit': self.cmdopts['processing_mem_limit'],
70
- 'storage_medium': self.cmdopts['storage_medium'],
71
- 'df_homogenize': self.cmdopts['df_homogenize']
72
- }
73
-
74
- if self.cmdopts['processing_serial']:
75
- n_gatherers = 1
76
- n_processors = 1
77
- else:
78
- # Aways need to have at least one of each! If SIERRA is invoked on a
79
- # machine with 2 or less logical cores, the calculation with
80
- # psutil.cpu_count() will return 0 for # gatherers.
81
- n_gatherers = max(1, int(psutil.cpu_count() * 0.25))
82
- n_processors = max(1, int(psutil.cpu_count() * 0.75))
83
-
84
- with mp.Pool(processes=n_gatherers + n_processors) as pool:
85
- self._execute(exp_to_avg, avg_opts, n_gatherers, n_processors, pool)
86
-
87
- def _execute(self,
88
- exp_to_avg: tp.List[pathlib.Path],
89
- avg_opts: types.SimpleDict,
90
- n_gatherers: int,
91
- n_processors: int,
92
- pool) -> None:
93
- m = mp.Manager()
94
- gatherq = m.Queue()
95
- processq = m.Queue()
96
-
97
- for exp in exp_to_avg:
98
- gatherq.put(exp)
99
-
100
- # Start some threads gathering .csvs first to get things rolling.
101
- self.logger.debug("Starting %d gatherers, method=%s",
102
- n_gatherers,
103
- mp.get_start_method())
104
- gathered = [pool.apply_async(BatchExpParallelCalculator._gather_worker,
105
- (gatherq,
106
- processq,
107
- self.main_config,
108
- avg_opts)) for i in range(0, n_gatherers)]
109
-
110
- self.logger.debug("Starting %d processors, method=%s",
111
- n_processors,
112
- mp.get_start_method())
113
- processed = [pool.apply_async(BatchExpParallelCalculator._process_worker,
114
- (processq,
115
- self.main_config,
116
- self.cmdopts['batch_stat_root'],
117
- avg_opts)) for i in range(0, n_processors)]
118
-
119
- # To capture the otherwise silent crashes when something goes wrong in
120
- # worker threads. Any assertions will show and any exceptions will be
121
- # re-raised.
122
- self.logger.debug("Waiting for workers to finish")
123
-
124
- for g in gathered:
125
- g.get()
126
-
127
- for p in processed:
128
- p.get()
129
-
130
- pool.close()
131
- pool.join()
132
- self.logger.debug("All threads finished")
133
-
134
- @staticmethod
135
- def _gather_worker(gatherq: mp.Queue,
136
- processq: mp.Queue,
137
- main_config: types.YAMLDict,
138
- avg_opts: tp.Dict[str, str]) -> None:
139
- gatherer = ExpCSVGatherer(main_config, avg_opts, processq)
140
-
141
- # Wait for 3 seconds after the queue is empty before bailing, at the
142
- # start. If that is not long enough then exponentially increase from
143
- # there until you find how long it takes to get the first item in the
144
- # queue, and use that as the appropriate timeout (plus a little
145
- # margin).
146
- timeout = 3
147
- got_item = False
148
- n_tries = 0
149
- while n_tries < 2:
150
- try:
151
- exp_output_root = gatherq.get(True, timeout)
152
-
153
- gatherer(exp_output_root)
154
- gatherq.task_done()
155
- got_item = True
156
-
157
- except queue.Empty:
158
- if got_item:
159
- break
160
-
161
- timeout *= 2
162
- n_tries += 1
163
-
164
- @staticmethod
165
- def _process_worker(processq: mp.Queue,
166
- main_config: types.YAMLDict,
167
- batch_stat_root: pathlib.Path,
168
- avg_opts: tp.Dict[str, str]) -> None:
169
- calculator = ExpStatisticsCalculator(main_config,
170
- avg_opts,
171
- batch_stat_root)
172
-
173
- # Wait for 3 seconds after the queue is empty before bailing, at the
174
- # start. If that is not long enough then exponentially increase from
175
- # there until you find how long it takes to get the first item in the
176
- # queue, and use that as the appropriate timeout (plus a little
177
- # margin).
178
- timeout = 3
179
- got_item = False
180
- n_tries = 0
181
- while n_tries < 2:
182
- try:
183
- item = processq.get(True, timeout)
184
- key = list(item.keys())[0]
185
-
186
- calculator(key, item[key])
187
- processq.task_done()
188
- got_item = True
189
-
190
- except queue.Empty:
191
- if got_item:
192
- break
193
-
194
- timeout *= 2
195
- n_tries += 1
196
-
197
-
198
- class ExpCSVGatherer:
199
- """Gather all :term:`Output .csv` files from all runs within an experiment.
200
-
201
- "Gathering" in this context means creating a dictionary mapping which .csv
202
- came from where, so that statistics can be generated both across and with
203
- experiments in the batch.
204
- """
205
-
206
- def __init__(self,
207
- main_config: types.YAMLDict,
208
- gather_opts: dict,
209
- processq: mp.Queue) -> None:
210
- self.processq = processq
211
- self.gather_opts = gather_opts
212
-
213
- # Will get the main name and extension of the config file (without the
214
- # full absolute path).
215
- self.template_input_fname = self.gather_opts['template_input_leaf']
216
-
217
- self.main_config = main_config
218
-
219
- self.run_metrics_leaf = main_config['sierra']['run']['run_metrics_leaf']
220
- self.videos_leaf = 'videos'
221
- self.project_imagize = gather_opts['project_imagizing']
222
-
223
- self.logger = logging.getLogger(__name__)
224
-
225
- def __call__(self, exp_output_root: pathlib.Path) -> None:
226
- """Process the CSV files found in the output save path."""
227
- if not self.gather_opts['df_skip_verify']:
228
- self._verify_exp_outputs(exp_output_root)
229
-
230
- self.logger.info('Processing .csvs: %s...', exp_output_root.name)
231
-
232
- pattern = "{}_{}_output".format(re.escape(self.gather_opts['template_input_leaf']),
233
- r'\d+')
234
-
235
- runs = list(exp_output_root.iterdir())
236
- assert(all(re.match(pattern, r.name) for r in runs)),\
237
- f"Extra files/not all dirs in '{exp_output_root}' are exp runs"
238
-
239
- # Maps (unique .csv stem, optional parent dir) to the averaged dataframe
240
- to_gather = self._calc_gather_items(runs[0], exp_output_root.name)
241
-
242
- for item in to_gather:
243
- self._wait_for_memory()
244
- gathered = self._gather_item_from_sims(exp_output_root, item, runs)
245
-
246
- # Put gathered .csv list in the process queue
247
- self.processq.put(gathered)
248
-
249
- self.logger.debug("Enqueued %s items from %s for processing",
250
- len(to_gather),
251
- exp_output_root.name)
252
-
253
- def _calc_gather_items(self,
254
- run_output_root: pathlib.Path,
255
- exp_name: str) -> tp.List[GatherSpec]:
256
- to_gather = []
257
-
258
- sim_output_root = run_output_root / self.run_metrics_leaf
259
-
260
- # The metrics folder should contain nothing but .csv files and
261
- # directories. For all directories it contains, they each should contain
262
- # nothing but .csv files (these are for video rendering later).
263
- for item in sim_output_root.iterdir():
264
- csv_stem = item.stem
265
-
266
- if item.is_file():
267
- to_gather.append(GatherSpec(exp_name=exp_name,
268
- item_stem=csv_stem,
269
- imagize_csv_stem=None))
270
- else:
271
- # This takes FOREVER, so only do it if we absolutely need to
272
- if not self.project_imagize:
273
- continue
274
-
275
- for csv_fname in item.iterdir():
276
- to_gather.append(GatherSpec(exp_name=exp_name,
277
- item_stem=csv_stem,
278
- imagize_csv_stem=csv_fname.stem))
279
-
280
- return to_gather
281
-
282
- def _gather_item_from_sims(self,
283
- exp_output_root: pathlib.Path,
284
- item: GatherSpec,
285
- runs: tp.List[pathlib.Path]) -> tp.Dict[GatherSpec,
286
- tp.List[pd.DataFrame]]:
287
- gathered = {} # type: tp.Dict[GatherSpec, pd.DataFrame]
288
-
289
- for run in runs:
290
- sim_output_root = run / self.run_metrics_leaf
291
-
292
- if item.for_imagizing():
293
- item_path = sim_output_root / item.item_stem / \
294
- (item.imagize_csv_stem + config.kStorageExt['csv'])
295
- else:
296
- item_path = sim_output_root / \
297
- (item.item_stem + config.kStorageExt['csv'])
298
-
299
- reader = storage.DataFrameReader(self.gather_opts['storage_medium'])
300
- df = reader(item_path, index_col=False)
301
-
302
- if df.dtypes[0] == 'object':
303
- df[df.columns[0]] = df[df.columns[0]].apply(lambda x: float(x))
304
-
305
- if item not in gathered:
306
- gathered[item] = []
307
-
308
- gathered[item].append(df)
309
-
310
- return gathered
311
-
312
- def _wait_for_memory(self) -> None:
313
- while True:
314
- mem = psutil.virtual_memory()
315
- avail = mem.available / mem.total
316
- free_percent = avail * 100
317
- free_limit = 100 - self.gather_opts['processing_mem_limit']
318
-
319
- if free_percent >= free_limit:
320
- return
321
-
322
- self.logger.info("Waiting for memory: avail=%s,min=%s",
323
- free_percent,
324
- free_limit)
325
- time.sleep(1)
326
-
327
- def _verify_exp_outputs(self, exp_output_root: pathlib.Path) -> None:
328
- """
329
- Verify the integrity of all runs in an experiment.
330
-
331
- Specifically:
332
-
333
- - All runs produced all CSV files.
334
-
335
- - All runs CSV files with the same name have the same # rows and
336
- columns.
337
-
338
- - No CSV files contain NaNs.
339
- """
340
- experiments = exp_output_root.iterdir()
341
-
342
- self.logger.info('Verifying results in %s...', str(exp_output_root))
343
-
344
- start = time.time()
345
-
346
- for exp1 in experiments:
347
- csv_root1 = exp1 / self.run_metrics_leaf
348
-
349
- for exp2 in experiments:
350
- csv_root2 = exp2 / self.run_metrics_leaf
351
-
352
- if not csv_root2.is_dir():
353
- continue
354
-
355
- self._verify_exp_outputs_pairwise(csv_root1, csv_root2)
356
-
357
- elapsed = int(time.time() - start)
358
- sec = datetime.timedelta(seconds=elapsed)
359
- self.logger.info("Done verifying results in %s: %s",
360
- exp_output_root,
361
- sec)
362
-
363
- def _verify_exp_outputs_pairwise(self,
364
- csv_root1: pathlib.Path,
365
- csv_root2: pathlib.Path) -> None:
366
- for csv in csv_root2.iterdir():
367
- path1 = csv
368
- path2 = csv_root2 / csv.name
369
-
370
- # .csvs for rendering that we don't verify (for now...)
371
- if path1.is_dir() or path2.is_dir():
372
- self.logger.debug("Not verifying '%s': contains rendering data",
373
- str(path1))
374
- continue
375
-
376
- assert (utils.path_exists(path1) and utils.path_exists(path2)),\
377
- f"Either {path1} or {path2} does not exist"
378
-
379
- # Verify both dataframes have same # columns, and that
380
- # column sets are identical
381
- reader = storage.DataFrameReader(self.gather_opts['storage_medium'])
382
- df1 = reader(path1)
383
- df2 = reader(path2)
384
-
385
- assert (len(df1.columns) == len(df2.columns)), \
386
- (f"Dataframes from {path1} and {path2} do not have "
387
- "the same # columns")
388
- assert(sorted(df1.columns) == sorted(df2.columns)),\
389
- f"Columns from {path1} and {path2} not identical"
390
-
391
- # Verify the length of all columns in both dataframes is the same
392
- for c1 in df1.columns:
393
- assert(all(len(df1[c1]) == len(df1[c2]) for c2 in df1.columns)),\
394
- f"Not all columns from {path1} have same length"
395
-
396
- assert(all(len(df1[c1]) == len(df2[c2]) for c2 in df1.columns)),\
397
- (f"Not all columns from {path1} and {path2} have "
398
- "the same length")
399
-
400
-
401
- class ExpStatisticsCalculator:
402
- """Generate statistics from output files for all runs within an experiment.
403
-
404
- .. IMPORTANT:: You *CANNOT* use logging ANYWHERE during processing .csv
405
- files. Why ? I *think* because of a bug in the logging module itself. If
406
- you get unlucky enough to spawn the process which enters the __call__()
407
- method in this class while another logging statement is in progress (and
408
- is therefore holding an internal logging module lock), then the
409
- underlying fork() call will copy the lock in the acquired state. Then,
410
- when this class goes to try to log something, it deadlocks with itself.
411
-
412
- You also can't just create loggers with unique names, as this seems to be
413
- something like the GIL, but for the logging module. Sometimes python
414
- sucks.
415
- """
416
-
417
- def __init__(self,
418
- main_config: types.YAMLDict,
419
- avg_opts: dict,
420
- batch_stat_root: pathlib.Path) -> None:
421
- self.avg_opts = avg_opts
422
-
423
- # will get the main name and extension of the config file (without the
424
- # full absolute path)
425
- self.template_input_fname = self.avg_opts['template_input_leaf']
426
-
427
- self.main_config = main_config
428
- self.batch_stat_root = batch_stat_root
429
-
430
- self.intra_perf_csv = main_config['sierra']['perf']['intra_perf_csv']
431
- self.intra_perf_col = main_config['sierra']['perf']['intra_perf_col']
432
-
433
- def __call__(self,
434
- gather_spec: GatherSpec,
435
- gathered_dfs: tp.List[pd.DataFrame]) -> None:
436
-
437
- csv_concat = pd.concat(gathered_dfs)
438
-
439
- exp_stat_root = self.batch_stat_root / gather_spec.exp_name
440
- utils.dir_create_checked(exp_stat_root, exist_ok=True)
441
-
442
- # Create directory for averaged .csv files for imagizing later.
443
- if gather_spec.for_imagizing():
444
- utils.dir_create_checked(exp_stat_root / gather_spec.item_stem,
445
- exist_ok=True)
446
-
447
- by_row_index = csv_concat.groupby(csv_concat.index)
448
-
449
- dfs = {}
450
- if self.avg_opts['dist_stats'] in ['none', 'all']:
451
- dfs.update(stat_kernels.mean.from_groupby(by_row_index))
452
-
453
- if self.avg_opts['dist_stats'] in ['conf95', 'all']:
454
- dfs.update(stat_kernels.conf95.from_groupby(by_row_index))
455
-
456
- if self.avg_opts['dist_stats'] in ['bw', 'all']:
457
- dfs.update(stat_kernels.bw.from_groupby(by_row_index))
458
-
459
- for ext in dfs:
460
- opath = exp_stat_root / gather_spec.item_stem
461
-
462
- if gather_spec.for_imagizing():
463
- opath /= (gather_spec.imagize_csv_stem + ext)
464
-
465
- else:
466
- opath = opath.with_suffix(ext)
467
-
468
- df = utils.df_fill(dfs[ext], self.avg_opts['df_homogenize'])
469
- writer = storage.DataFrameWriter(self.avg_opts['storage_medium'])
470
- writer(df, opath, index=False)
471
-
472
-
473
- __api__ = [
474
- 'GatherSpec',
475
- 'BatchExpParallelCalculator',
476
- 'ExpCSVGatherer',
477
- 'ExpStatisticsCalculator'
478
- ]