sierra-research 1.3.11__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sierra/__init__.py +3 -3
- sierra/core/__init__.py +3 -3
- sierra/core/batchroot.py +223 -0
- sierra/core/cmdline.py +681 -1057
- sierra/core/compare.py +11 -0
- sierra/core/config.py +96 -88
- sierra/core/engine.py +306 -0
- sierra/core/execenv.py +380 -0
- sierra/core/expdef.py +11 -0
- sierra/core/experiment/__init__.py +1 -0
- sierra/core/experiment/bindings.py +150 -101
- sierra/core/experiment/definition.py +414 -245
- sierra/core/experiment/spec.py +83 -85
- sierra/core/exproot.py +44 -0
- sierra/core/generators/__init__.py +10 -0
- sierra/core/generators/experiment.py +528 -0
- sierra/core/generators/generator_factory.py +138 -137
- sierra/core/graphs/__init__.py +23 -0
- sierra/core/graphs/bcbridge.py +94 -0
- sierra/core/graphs/heatmap.py +245 -324
- sierra/core/graphs/pathset.py +27 -0
- sierra/core/graphs/schema.py +77 -0
- sierra/core/graphs/stacked_line.py +341 -0
- sierra/core/graphs/summary_line.py +506 -0
- sierra/core/logging.py +3 -2
- sierra/core/models/__init__.py +3 -1
- sierra/core/models/info.py +19 -0
- sierra/core/models/interface.py +52 -122
- sierra/core/pipeline/__init__.py +2 -5
- sierra/core/pipeline/pipeline.py +228 -126
- sierra/core/pipeline/stage1/__init__.py +10 -0
- sierra/core/pipeline/stage1/pipeline_stage1.py +45 -31
- sierra/core/pipeline/stage2/__init__.py +10 -0
- sierra/core/pipeline/stage2/pipeline_stage2.py +8 -11
- sierra/core/pipeline/stage2/runner.py +401 -0
- sierra/core/pipeline/stage3/__init__.py +12 -0
- sierra/core/pipeline/stage3/gather.py +321 -0
- sierra/core/pipeline/stage3/pipeline_stage3.py +37 -84
- sierra/core/pipeline/stage4/__init__.py +12 -2
- sierra/core/pipeline/stage4/pipeline_stage4.py +36 -354
- sierra/core/pipeline/stage5/__init__.py +12 -0
- sierra/core/pipeline/stage5/pipeline_stage5.py +33 -208
- sierra/core/pipeline/yaml.py +48 -0
- sierra/core/plugin.py +529 -62
- sierra/core/proc.py +11 -0
- sierra/core/prod.py +11 -0
- sierra/core/ros1/__init__.py +5 -1
- sierra/core/ros1/callbacks.py +22 -21
- sierra/core/ros1/cmdline.py +59 -88
- sierra/core/ros1/generators.py +159 -175
- sierra/core/ros1/variables/__init__.py +3 -0
- sierra/core/ros1/variables/exp_setup.py +122 -116
- sierra/core/startup.py +106 -76
- sierra/core/stat_kernels.py +4 -5
- sierra/core/storage.py +13 -32
- sierra/core/trampoline.py +30 -0
- sierra/core/types.py +116 -71
- sierra/core/utils.py +103 -106
- sierra/core/variables/__init__.py +1 -1
- sierra/core/variables/base_variable.py +12 -17
- sierra/core/variables/batch_criteria.py +387 -481
- sierra/core/variables/builtin.py +135 -0
- sierra/core/variables/exp_setup.py +19 -39
- sierra/core/variables/population_size.py +72 -76
- sierra/core/variables/variable_density.py +44 -68
- sierra/core/vector.py +1 -1
- sierra/main.py +256 -88
- sierra/plugins/__init__.py +119 -0
- sierra/plugins/compare/__init__.py +14 -0
- sierra/plugins/compare/graphs/__init__.py +19 -0
- sierra/plugins/compare/graphs/cmdline.py +120 -0
- sierra/plugins/compare/graphs/comparator.py +291 -0
- sierra/plugins/compare/graphs/inter_controller.py +531 -0
- sierra/plugins/compare/graphs/inter_scenario.py +297 -0
- sierra/plugins/compare/graphs/namecalc.py +53 -0
- sierra/plugins/compare/graphs/outputroot.py +73 -0
- sierra/plugins/compare/graphs/plugin.py +147 -0
- sierra/plugins/compare/graphs/preprocess.py +172 -0
- sierra/plugins/compare/graphs/schema.py +37 -0
- sierra/plugins/engine/__init__.py +14 -0
- sierra/plugins/engine/argos/__init__.py +18 -0
- sierra/plugins/{platform → engine}/argos/cmdline.py +144 -151
- sierra/plugins/{platform/argos/variables → engine/argos/generators}/__init__.py +5 -0
- sierra/plugins/engine/argos/generators/engine.py +394 -0
- sierra/plugins/engine/argos/plugin.py +393 -0
- sierra/plugins/{platform/argos/generators → engine/argos/variables}/__init__.py +5 -0
- sierra/plugins/engine/argos/variables/arena_shape.py +183 -0
- sierra/plugins/engine/argos/variables/cameras.py +240 -0
- sierra/plugins/engine/argos/variables/constant_density.py +112 -0
- sierra/plugins/engine/argos/variables/exp_setup.py +82 -0
- sierra/plugins/{platform → engine}/argos/variables/physics_engines.py +83 -87
- sierra/plugins/engine/argos/variables/population_constant_density.py +178 -0
- sierra/plugins/engine/argos/variables/population_size.py +115 -0
- sierra/plugins/engine/argos/variables/population_variable_density.py +123 -0
- sierra/plugins/engine/argos/variables/rendering.py +108 -0
- sierra/plugins/engine/ros1gazebo/__init__.py +18 -0
- sierra/plugins/engine/ros1gazebo/cmdline.py +175 -0
- sierra/plugins/{platform/ros1robot → engine/ros1gazebo}/generators/__init__.py +5 -0
- sierra/plugins/engine/ros1gazebo/generators/engine.py +125 -0
- sierra/plugins/engine/ros1gazebo/plugin.py +404 -0
- sierra/plugins/engine/ros1gazebo/variables/__init__.py +15 -0
- sierra/plugins/engine/ros1gazebo/variables/population_size.py +214 -0
- sierra/plugins/engine/ros1robot/__init__.py +18 -0
- sierra/plugins/engine/ros1robot/cmdline.py +159 -0
- sierra/plugins/{platform/ros1gazebo → engine/ros1robot}/generators/__init__.py +4 -0
- sierra/plugins/engine/ros1robot/generators/engine.py +95 -0
- sierra/plugins/engine/ros1robot/plugin.py +410 -0
- sierra/plugins/{hpc/local → engine/ros1robot/variables}/__init__.py +5 -0
- sierra/plugins/engine/ros1robot/variables/population_size.py +146 -0
- sierra/plugins/execenv/__init__.py +11 -0
- sierra/plugins/execenv/hpc/__init__.py +18 -0
- sierra/plugins/execenv/hpc/adhoc/__init__.py +18 -0
- sierra/plugins/execenv/hpc/adhoc/cmdline.py +30 -0
- sierra/plugins/execenv/hpc/adhoc/plugin.py +131 -0
- sierra/plugins/execenv/hpc/cmdline.py +137 -0
- sierra/plugins/execenv/hpc/local/__init__.py +18 -0
- sierra/plugins/execenv/hpc/local/cmdline.py +31 -0
- sierra/plugins/execenv/hpc/local/plugin.py +145 -0
- sierra/plugins/execenv/hpc/pbs/__init__.py +18 -0
- sierra/plugins/execenv/hpc/pbs/cmdline.py +30 -0
- sierra/plugins/execenv/hpc/pbs/plugin.py +121 -0
- sierra/plugins/execenv/hpc/slurm/__init__.py +18 -0
- sierra/plugins/execenv/hpc/slurm/cmdline.py +30 -0
- sierra/plugins/execenv/hpc/slurm/plugin.py +133 -0
- sierra/plugins/execenv/prefectserver/__init__.py +18 -0
- sierra/plugins/execenv/prefectserver/cmdline.py +66 -0
- sierra/plugins/execenv/prefectserver/dockerremote/__init__.py +18 -0
- sierra/plugins/execenv/prefectserver/dockerremote/cmdline.py +66 -0
- sierra/plugins/execenv/prefectserver/dockerremote/plugin.py +132 -0
- sierra/plugins/execenv/prefectserver/flow.py +66 -0
- sierra/plugins/execenv/prefectserver/local/__init__.py +18 -0
- sierra/plugins/execenv/prefectserver/local/cmdline.py +29 -0
- sierra/plugins/execenv/prefectserver/local/plugin.py +133 -0
- sierra/plugins/{hpc/adhoc → execenv/robot}/__init__.py +1 -0
- sierra/plugins/execenv/robot/turtlebot3/__init__.py +18 -0
- sierra/plugins/execenv/robot/turtlebot3/plugin.py +204 -0
- sierra/plugins/expdef/__init__.py +14 -0
- sierra/plugins/expdef/json/__init__.py +14 -0
- sierra/plugins/expdef/json/plugin.py +504 -0
- sierra/plugins/expdef/xml/__init__.py +14 -0
- sierra/plugins/expdef/xml/plugin.py +386 -0
- sierra/{core/hpc → plugins/proc}/__init__.py +1 -1
- sierra/plugins/proc/collate/__init__.py +15 -0
- sierra/plugins/proc/collate/cmdline.py +47 -0
- sierra/plugins/proc/collate/plugin.py +271 -0
- sierra/plugins/proc/compress/__init__.py +18 -0
- sierra/plugins/proc/compress/cmdline.py +47 -0
- sierra/plugins/proc/compress/plugin.py +123 -0
- sierra/plugins/proc/decompress/__init__.py +18 -0
- sierra/plugins/proc/decompress/plugin.py +96 -0
- sierra/plugins/proc/imagize/__init__.py +15 -0
- sierra/plugins/proc/imagize/cmdline.py +49 -0
- sierra/plugins/proc/imagize/plugin.py +270 -0
- sierra/plugins/proc/modelrunner/__init__.py +16 -0
- sierra/plugins/proc/modelrunner/plugin.py +250 -0
- sierra/plugins/proc/statistics/__init__.py +15 -0
- sierra/plugins/proc/statistics/cmdline.py +64 -0
- sierra/plugins/proc/statistics/plugin.py +390 -0
- sierra/plugins/{hpc → prod}/__init__.py +1 -0
- sierra/plugins/prod/graphs/__init__.py +18 -0
- sierra/plugins/prod/graphs/cmdline.py +269 -0
- sierra/plugins/prod/graphs/collate.py +279 -0
- sierra/plugins/prod/graphs/inter/__init__.py +13 -0
- sierra/plugins/prod/graphs/inter/generate.py +83 -0
- sierra/plugins/prod/graphs/inter/heatmap.py +86 -0
- sierra/plugins/prod/graphs/inter/line.py +134 -0
- sierra/plugins/prod/graphs/intra/__init__.py +15 -0
- sierra/plugins/prod/graphs/intra/generate.py +202 -0
- sierra/plugins/prod/graphs/intra/heatmap.py +74 -0
- sierra/plugins/prod/graphs/intra/line.py +114 -0
- sierra/plugins/prod/graphs/plugin.py +103 -0
- sierra/plugins/prod/graphs/targets.py +63 -0
- sierra/plugins/prod/render/__init__.py +18 -0
- sierra/plugins/prod/render/cmdline.py +72 -0
- sierra/plugins/prod/render/plugin.py +282 -0
- sierra/plugins/storage/__init__.py +5 -0
- sierra/plugins/storage/arrow/__init__.py +18 -0
- sierra/plugins/storage/arrow/plugin.py +38 -0
- sierra/plugins/storage/csv/__init__.py +9 -0
- sierra/plugins/storage/csv/plugin.py +12 -5
- sierra/version.py +3 -2
- sierra_research-1.5.0.dist-info/METADATA +238 -0
- sierra_research-1.5.0.dist-info/RECORD +186 -0
- {sierra_research-1.3.11.dist-info → sierra_research-1.5.0.dist-info}/WHEEL +1 -2
- sierra/core/experiment/xml.py +0 -454
- sierra/core/generators/controller_generator_parser.py +0 -34
- sierra/core/generators/exp_creator.py +0 -351
- sierra/core/generators/exp_generators.py +0 -142
- sierra/core/graphs/scatterplot2D.py +0 -109
- sierra/core/graphs/stacked_line_graph.py +0 -251
- sierra/core/graphs/stacked_surface_graph.py +0 -220
- sierra/core/graphs/summary_line_graph.py +0 -371
- sierra/core/hpc/cmdline.py +0 -142
- sierra/core/models/graphs.py +0 -87
- sierra/core/pipeline/stage2/exp_runner.py +0 -286
- sierra/core/pipeline/stage3/imagizer.py +0 -149
- sierra/core/pipeline/stage3/run_collator.py +0 -317
- sierra/core/pipeline/stage3/statistics_calculator.py +0 -478
- sierra/core/pipeline/stage4/graph_collator.py +0 -320
- sierra/core/pipeline/stage4/inter_exp_graph_generator.py +0 -240
- sierra/core/pipeline/stage4/intra_exp_graph_generator.py +0 -317
- sierra/core/pipeline/stage4/model_runner.py +0 -168
- sierra/core/pipeline/stage4/rendering.py +0 -283
- sierra/core/pipeline/stage4/yaml_config_loader.py +0 -103
- sierra/core/pipeline/stage5/inter_scenario_comparator.py +0 -328
- sierra/core/pipeline/stage5/intra_scenario_comparator.py +0 -989
- sierra/core/platform.py +0 -493
- sierra/core/plugin_manager.py +0 -369
- sierra/core/root_dirpath_generator.py +0 -241
- sierra/plugins/hpc/adhoc/plugin.py +0 -125
- sierra/plugins/hpc/local/plugin.py +0 -81
- sierra/plugins/hpc/pbs/__init__.py +0 -9
- sierra/plugins/hpc/pbs/plugin.py +0 -126
- sierra/plugins/hpc/slurm/__init__.py +0 -9
- sierra/plugins/hpc/slurm/plugin.py +0 -130
- sierra/plugins/platform/__init__.py +0 -9
- sierra/plugins/platform/argos/__init__.py +0 -9
- sierra/plugins/platform/argos/generators/platform_generators.py +0 -383
- sierra/plugins/platform/argos/plugin.py +0 -337
- sierra/plugins/platform/argos/variables/arena_shape.py +0 -145
- sierra/plugins/platform/argos/variables/cameras.py +0 -243
- sierra/plugins/platform/argos/variables/constant_density.py +0 -136
- sierra/plugins/platform/argos/variables/exp_setup.py +0 -113
- sierra/plugins/platform/argos/variables/population_constant_density.py +0 -175
- sierra/plugins/platform/argos/variables/population_size.py +0 -102
- sierra/plugins/platform/argos/variables/population_variable_density.py +0 -132
- sierra/plugins/platform/argos/variables/rendering.py +0 -104
- sierra/plugins/platform/ros1gazebo/__init__.py +0 -9
- sierra/plugins/platform/ros1gazebo/cmdline.py +0 -213
- sierra/plugins/platform/ros1gazebo/generators/platform_generators.py +0 -137
- sierra/plugins/platform/ros1gazebo/plugin.py +0 -335
- sierra/plugins/platform/ros1gazebo/variables/__init__.py +0 -10
- sierra/plugins/platform/ros1gazebo/variables/population_size.py +0 -204
- sierra/plugins/platform/ros1robot/__init__.py +0 -9
- sierra/plugins/platform/ros1robot/cmdline.py +0 -175
- sierra/plugins/platform/ros1robot/generators/platform_generators.py +0 -112
- sierra/plugins/platform/ros1robot/plugin.py +0 -373
- sierra/plugins/platform/ros1robot/variables/__init__.py +0 -10
- sierra/plugins/platform/ros1robot/variables/population_size.py +0 -146
- sierra/plugins/robot/__init__.py +0 -9
- sierra/plugins/robot/turtlebot3/__init__.py +0 -9
- sierra/plugins/robot/turtlebot3/plugin.py +0 -194
- sierra_research-1.3.11.data/data/share/man/man1/sierra-cli.1 +0 -2349
- sierra_research-1.3.11.data/data/share/man/man7/sierra-examples.7 +0 -508
- sierra_research-1.3.11.data/data/share/man/man7/sierra-exec-envs.7 +0 -331
- sierra_research-1.3.11.data/data/share/man/man7/sierra-glossary.7 +0 -285
- sierra_research-1.3.11.data/data/share/man/man7/sierra-platforms.7 +0 -358
- sierra_research-1.3.11.data/data/share/man/man7/sierra-usage.7 +0 -729
- sierra_research-1.3.11.data/data/share/man/man7/sierra.7 +0 -78
- sierra_research-1.3.11.dist-info/METADATA +0 -492
- sierra_research-1.3.11.dist-info/RECORD +0 -133
- sierra_research-1.3.11.dist-info/top_level.txt +0 -1
- {sierra_research-1.3.11.dist-info → sierra_research-1.5.0.dist-info}/entry_points.txt +0 -0
- {sierra_research-1.3.11.dist-info → sierra_research-1.5.0.dist-info/licenses}/LICENSE +0 -0
@@ -1,478 +0,0 @@
|
|
1
|
-
# Copyright 2019 John Harwell, All rights reserved.
|
2
|
-
#
|
3
|
-
# SPDX-License-Identifier: MIT
|
4
|
-
|
5
|
-
"""
|
6
|
-
Classes for generating statistics within and across experiments in a batch.
|
7
|
-
"""
|
8
|
-
|
9
|
-
# Core packages
|
10
|
-
import re
|
11
|
-
import multiprocessing as mp
|
12
|
-
import typing as tp
|
13
|
-
import queue
|
14
|
-
import time
|
15
|
-
import datetime
|
16
|
-
import logging
|
17
|
-
import pathlib
|
18
|
-
|
19
|
-
# 3rd party packages
|
20
|
-
import pandas as pd
|
21
|
-
import psutil
|
22
|
-
|
23
|
-
# Project packages
|
24
|
-
import sierra.core.variables.batch_criteria as bc
|
25
|
-
from sierra.core import types, utils, stat_kernels, storage, config
|
26
|
-
|
27
|
-
|
28
|
-
class GatherSpec:
|
29
|
-
"""
|
30
|
-
Data class for specifying .csv files to gather from an :term:`Experiment`.
|
31
|
-
"""
|
32
|
-
|
33
|
-
def __init__(self,
|
34
|
-
exp_name: str,
|
35
|
-
item_stem: str,
|
36
|
-
imagize_csv_stem: tp.Optional[str]):
|
37
|
-
self.exp_name = exp_name
|
38
|
-
self.item_stem = item_stem
|
39
|
-
self.imagize_csv_stem = imagize_csv_stem
|
40
|
-
|
41
|
-
def for_imagizing(self):
|
42
|
-
return self.imagize_csv_stem is not None
|
43
|
-
|
44
|
-
|
45
|
-
class BatchExpParallelCalculator:
|
46
|
-
"""Process :term:`Output .csv` files for each experiment in the batch.
|
47
|
-
|
48
|
-
In parallel for speed.
|
49
|
-
"""
|
50
|
-
|
51
|
-
def __init__(self, main_config: dict, cmdopts: types.Cmdopts):
|
52
|
-
self.main_config = main_config
|
53
|
-
self.cmdopts = cmdopts
|
54
|
-
self.logger = logging.getLogger(__name__)
|
55
|
-
|
56
|
-
def __call__(self, criteria: bc.IConcreteBatchCriteria) -> None:
|
57
|
-
|
58
|
-
exp_to_avg = utils.exp_range_calc(self.cmdopts,
|
59
|
-
self.cmdopts['batch_output_root'],
|
60
|
-
criteria)
|
61
|
-
|
62
|
-
template_input_leaf = pathlib.Path(self.cmdopts['template_input_file']).stem
|
63
|
-
|
64
|
-
avg_opts = {
|
65
|
-
'template_input_leaf': template_input_leaf,
|
66
|
-
'df_skip_verify': self.cmdopts['df_skip_verify'],
|
67
|
-
'dist_stats': self.cmdopts['dist_stats'],
|
68
|
-
'project_imagizing': self.cmdopts['project_imagizing'],
|
69
|
-
'processing_mem_limit': self.cmdopts['processing_mem_limit'],
|
70
|
-
'storage_medium': self.cmdopts['storage_medium'],
|
71
|
-
'df_homogenize': self.cmdopts['df_homogenize']
|
72
|
-
}
|
73
|
-
|
74
|
-
if self.cmdopts['processing_serial']:
|
75
|
-
n_gatherers = 1
|
76
|
-
n_processors = 1
|
77
|
-
else:
|
78
|
-
# Aways need to have at least one of each! If SIERRA is invoked on a
|
79
|
-
# machine with 2 or less logical cores, the calculation with
|
80
|
-
# psutil.cpu_count() will return 0 for # gatherers.
|
81
|
-
n_gatherers = max(1, int(psutil.cpu_count() * 0.25))
|
82
|
-
n_processors = max(1, int(psutil.cpu_count() * 0.75))
|
83
|
-
|
84
|
-
with mp.Pool(processes=n_gatherers + n_processors) as pool:
|
85
|
-
self._execute(exp_to_avg, avg_opts, n_gatherers, n_processors, pool)
|
86
|
-
|
87
|
-
def _execute(self,
|
88
|
-
exp_to_avg: tp.List[pathlib.Path],
|
89
|
-
avg_opts: types.SimpleDict,
|
90
|
-
n_gatherers: int,
|
91
|
-
n_processors: int,
|
92
|
-
pool) -> None:
|
93
|
-
m = mp.Manager()
|
94
|
-
gatherq = m.Queue()
|
95
|
-
processq = m.Queue()
|
96
|
-
|
97
|
-
for exp in exp_to_avg:
|
98
|
-
gatherq.put(exp)
|
99
|
-
|
100
|
-
# Start some threads gathering .csvs first to get things rolling.
|
101
|
-
self.logger.debug("Starting %d gatherers, method=%s",
|
102
|
-
n_gatherers,
|
103
|
-
mp.get_start_method())
|
104
|
-
gathered = [pool.apply_async(BatchExpParallelCalculator._gather_worker,
|
105
|
-
(gatherq,
|
106
|
-
processq,
|
107
|
-
self.main_config,
|
108
|
-
avg_opts)) for i in range(0, n_gatherers)]
|
109
|
-
|
110
|
-
self.logger.debug("Starting %d processors, method=%s",
|
111
|
-
n_processors,
|
112
|
-
mp.get_start_method())
|
113
|
-
processed = [pool.apply_async(BatchExpParallelCalculator._process_worker,
|
114
|
-
(processq,
|
115
|
-
self.main_config,
|
116
|
-
self.cmdopts['batch_stat_root'],
|
117
|
-
avg_opts)) for i in range(0, n_processors)]
|
118
|
-
|
119
|
-
# To capture the otherwise silent crashes when something goes wrong in
|
120
|
-
# worker threads. Any assertions will show and any exceptions will be
|
121
|
-
# re-raised.
|
122
|
-
self.logger.debug("Waiting for workers to finish")
|
123
|
-
|
124
|
-
for g in gathered:
|
125
|
-
g.get()
|
126
|
-
|
127
|
-
for p in processed:
|
128
|
-
p.get()
|
129
|
-
|
130
|
-
pool.close()
|
131
|
-
pool.join()
|
132
|
-
self.logger.debug("All threads finished")
|
133
|
-
|
134
|
-
@staticmethod
|
135
|
-
def _gather_worker(gatherq: mp.Queue,
|
136
|
-
processq: mp.Queue,
|
137
|
-
main_config: types.YAMLDict,
|
138
|
-
avg_opts: tp.Dict[str, str]) -> None:
|
139
|
-
gatherer = ExpCSVGatherer(main_config, avg_opts, processq)
|
140
|
-
|
141
|
-
# Wait for 3 seconds after the queue is empty before bailing, at the
|
142
|
-
# start. If that is not long enough then exponentially increase from
|
143
|
-
# there until you find how long it takes to get the first item in the
|
144
|
-
# queue, and use that as the appropriate timeout (plus a little
|
145
|
-
# margin).
|
146
|
-
timeout = 3
|
147
|
-
got_item = False
|
148
|
-
n_tries = 0
|
149
|
-
while n_tries < 2:
|
150
|
-
try:
|
151
|
-
exp_output_root = gatherq.get(True, timeout)
|
152
|
-
|
153
|
-
gatherer(exp_output_root)
|
154
|
-
gatherq.task_done()
|
155
|
-
got_item = True
|
156
|
-
|
157
|
-
except queue.Empty:
|
158
|
-
if got_item:
|
159
|
-
break
|
160
|
-
|
161
|
-
timeout *= 2
|
162
|
-
n_tries += 1
|
163
|
-
|
164
|
-
@staticmethod
|
165
|
-
def _process_worker(processq: mp.Queue,
|
166
|
-
main_config: types.YAMLDict,
|
167
|
-
batch_stat_root: pathlib.Path,
|
168
|
-
avg_opts: tp.Dict[str, str]) -> None:
|
169
|
-
calculator = ExpStatisticsCalculator(main_config,
|
170
|
-
avg_opts,
|
171
|
-
batch_stat_root)
|
172
|
-
|
173
|
-
# Wait for 3 seconds after the queue is empty before bailing, at the
|
174
|
-
# start. If that is not long enough then exponentially increase from
|
175
|
-
# there until you find how long it takes to get the first item in the
|
176
|
-
# queue, and use that as the appropriate timeout (plus a little
|
177
|
-
# margin).
|
178
|
-
timeout = 3
|
179
|
-
got_item = False
|
180
|
-
n_tries = 0
|
181
|
-
while n_tries < 2:
|
182
|
-
try:
|
183
|
-
item = processq.get(True, timeout)
|
184
|
-
key = list(item.keys())[0]
|
185
|
-
|
186
|
-
calculator(key, item[key])
|
187
|
-
processq.task_done()
|
188
|
-
got_item = True
|
189
|
-
|
190
|
-
except queue.Empty:
|
191
|
-
if got_item:
|
192
|
-
break
|
193
|
-
|
194
|
-
timeout *= 2
|
195
|
-
n_tries += 1
|
196
|
-
|
197
|
-
|
198
|
-
class ExpCSVGatherer:
|
199
|
-
"""Gather all :term:`Output .csv` files from all runs within an experiment.
|
200
|
-
|
201
|
-
"Gathering" in this context means creating a dictionary mapping which .csv
|
202
|
-
came from where, so that statistics can be generated both across and with
|
203
|
-
experiments in the batch.
|
204
|
-
"""
|
205
|
-
|
206
|
-
def __init__(self,
|
207
|
-
main_config: types.YAMLDict,
|
208
|
-
gather_opts: dict,
|
209
|
-
processq: mp.Queue) -> None:
|
210
|
-
self.processq = processq
|
211
|
-
self.gather_opts = gather_opts
|
212
|
-
|
213
|
-
# Will get the main name and extension of the config file (without the
|
214
|
-
# full absolute path).
|
215
|
-
self.template_input_fname = self.gather_opts['template_input_leaf']
|
216
|
-
|
217
|
-
self.main_config = main_config
|
218
|
-
|
219
|
-
self.run_metrics_leaf = main_config['sierra']['run']['run_metrics_leaf']
|
220
|
-
self.videos_leaf = 'videos'
|
221
|
-
self.project_imagize = gather_opts['project_imagizing']
|
222
|
-
|
223
|
-
self.logger = logging.getLogger(__name__)
|
224
|
-
|
225
|
-
def __call__(self, exp_output_root: pathlib.Path) -> None:
|
226
|
-
"""Process the CSV files found in the output save path."""
|
227
|
-
if not self.gather_opts['df_skip_verify']:
|
228
|
-
self._verify_exp_outputs(exp_output_root)
|
229
|
-
|
230
|
-
self.logger.info('Processing .csvs: %s...', exp_output_root.name)
|
231
|
-
|
232
|
-
pattern = "{}_run{}_output".format(re.escape(self.gather_opts['template_input_leaf']),
|
233
|
-
r'\d+')
|
234
|
-
|
235
|
-
runs = list(exp_output_root.iterdir())
|
236
|
-
assert (all(re.match(pattern, r.name) for r in runs)), \
|
237
|
-
f"Extra files/not all dirs in '{exp_output_root}' are exp runs"
|
238
|
-
|
239
|
-
# Maps (unique .csv stem, optional parent dir) to the averaged dataframe
|
240
|
-
to_gather = self._calc_gather_items(runs[0], exp_output_root.name)
|
241
|
-
|
242
|
-
for item in to_gather:
|
243
|
-
self._wait_for_memory()
|
244
|
-
gathered = self._gather_item_from_sims(exp_output_root, item, runs)
|
245
|
-
|
246
|
-
# Put gathered .csv list in the process queue
|
247
|
-
self.processq.put(gathered)
|
248
|
-
|
249
|
-
self.logger.debug("Enqueued %s items from %s for processing",
|
250
|
-
len(to_gather),
|
251
|
-
exp_output_root.name)
|
252
|
-
|
253
|
-
def _calc_gather_items(self,
|
254
|
-
run_output_root: pathlib.Path,
|
255
|
-
exp_name: str) -> tp.List[GatherSpec]:
|
256
|
-
to_gather = []
|
257
|
-
|
258
|
-
sim_output_root = run_output_root / self.run_metrics_leaf
|
259
|
-
|
260
|
-
# The metrics folder should contain nothing but .csv files and
|
261
|
-
# directories. For all directories it contains, they each should contain
|
262
|
-
# nothing but .csv files (these are for video rendering later).
|
263
|
-
for item in sim_output_root.iterdir():
|
264
|
-
csv_stem = item.stem
|
265
|
-
|
266
|
-
if item.is_file():
|
267
|
-
to_gather.append(GatherSpec(exp_name=exp_name,
|
268
|
-
item_stem=csv_stem,
|
269
|
-
imagize_csv_stem=None))
|
270
|
-
else:
|
271
|
-
# This takes FOREVER, so only do it if we absolutely need to
|
272
|
-
if not self.project_imagize:
|
273
|
-
continue
|
274
|
-
|
275
|
-
for csv_fname in item.iterdir():
|
276
|
-
to_gather.append(GatherSpec(exp_name=exp_name,
|
277
|
-
item_stem=csv_stem,
|
278
|
-
imagize_csv_stem=csv_fname.stem))
|
279
|
-
|
280
|
-
return to_gather
|
281
|
-
|
282
|
-
def _gather_item_from_sims(self,
|
283
|
-
exp_output_root: pathlib.Path,
|
284
|
-
item: GatherSpec,
|
285
|
-
runs: tp.List[pathlib.Path]) -> tp.Dict[GatherSpec,
|
286
|
-
tp.List[pd.DataFrame]]:
|
287
|
-
gathered = {} # type: tp.Dict[GatherSpec, pd.DataFrame]
|
288
|
-
|
289
|
-
for run in runs:
|
290
|
-
sim_output_root = run / self.run_metrics_leaf
|
291
|
-
|
292
|
-
if item.for_imagizing():
|
293
|
-
item_path = sim_output_root / item.item_stem / \
|
294
|
-
(item.imagize_csv_stem + config.kStorageExt['csv'])
|
295
|
-
else:
|
296
|
-
item_path = sim_output_root / \
|
297
|
-
(item.item_stem + config.kStorageExt['csv'])
|
298
|
-
|
299
|
-
reader = storage.DataFrameReader(self.gather_opts['storage_medium'])
|
300
|
-
df = reader(item_path, index_col=False)
|
301
|
-
|
302
|
-
if df.dtypes.iloc[0] == 'object':
|
303
|
-
df[df.columns[0]] = df[df.columns[0]].apply(lambda x: float(x))
|
304
|
-
|
305
|
-
if item not in gathered:
|
306
|
-
gathered[item] = []
|
307
|
-
|
308
|
-
gathered[item].append(df)
|
309
|
-
|
310
|
-
return gathered
|
311
|
-
|
312
|
-
def _wait_for_memory(self) -> None:
|
313
|
-
while True:
|
314
|
-
mem = psutil.virtual_memory()
|
315
|
-
avail = mem.available / mem.total
|
316
|
-
free_percent = avail * 100
|
317
|
-
free_limit = 100 - self.gather_opts['processing_mem_limit']
|
318
|
-
|
319
|
-
if free_percent >= free_limit:
|
320
|
-
return
|
321
|
-
|
322
|
-
self.logger.info("Waiting for memory: avail=%s,min=%s",
|
323
|
-
free_percent,
|
324
|
-
free_limit)
|
325
|
-
time.sleep(1)
|
326
|
-
|
327
|
-
def _verify_exp_outputs(self, exp_output_root: pathlib.Path) -> None:
|
328
|
-
"""
|
329
|
-
Verify the integrity of all runs in an experiment.
|
330
|
-
|
331
|
-
Specifically:
|
332
|
-
|
333
|
-
- All runs produced all CSV files.
|
334
|
-
|
335
|
-
- All runs CSV files with the same name have the same # rows and
|
336
|
-
columns.
|
337
|
-
|
338
|
-
- No CSV files contain NaNs.
|
339
|
-
"""
|
340
|
-
experiments = exp_output_root.iterdir()
|
341
|
-
|
342
|
-
self.logger.info('Verifying results in %s...', str(exp_output_root))
|
343
|
-
|
344
|
-
start = time.time()
|
345
|
-
|
346
|
-
for exp1 in experiments:
|
347
|
-
csv_root1 = exp1 / self.run_metrics_leaf
|
348
|
-
|
349
|
-
for exp2 in experiments:
|
350
|
-
csv_root2 = exp2 / self.run_metrics_leaf
|
351
|
-
|
352
|
-
if not csv_root2.is_dir():
|
353
|
-
continue
|
354
|
-
|
355
|
-
self._verify_exp_outputs_pairwise(csv_root1, csv_root2)
|
356
|
-
|
357
|
-
elapsed = int(time.time() - start)
|
358
|
-
sec = datetime.timedelta(seconds=elapsed)
|
359
|
-
self.logger.info("Done verifying results in %s: %s",
|
360
|
-
exp_output_root,
|
361
|
-
sec)
|
362
|
-
|
363
|
-
def _verify_exp_outputs_pairwise(self,
|
364
|
-
csv_root1: pathlib.Path,
|
365
|
-
csv_root2: pathlib.Path) -> None:
|
366
|
-
for csv in csv_root2.iterdir():
|
367
|
-
path1 = csv
|
368
|
-
path2 = csv_root2 / csv.name
|
369
|
-
|
370
|
-
# .csvs for rendering that we don't verify (for now...)
|
371
|
-
if path1.is_dir() or path2.is_dir():
|
372
|
-
self.logger.debug("Not verifying '%s': contains rendering data",
|
373
|
-
str(path1))
|
374
|
-
continue
|
375
|
-
|
376
|
-
assert (utils.path_exists(path1) and utils.path_exists(path2)), \
|
377
|
-
f"Either {path1} or {path2} does not exist"
|
378
|
-
|
379
|
-
# Verify both dataframes have same # columns, and that
|
380
|
-
# column sets are identical
|
381
|
-
reader = storage.DataFrameReader(self.gather_opts['storage_medium'])
|
382
|
-
df1 = reader(path1)
|
383
|
-
df2 = reader(path2)
|
384
|
-
|
385
|
-
assert (len(df1.columns) == len(df2.columns)), \
|
386
|
-
(f"Dataframes from {path1} and {path2} do not have "
|
387
|
-
"the same # columns")
|
388
|
-
assert (sorted(df1.columns) == sorted(df2.columns)), \
|
389
|
-
f"Columns from {path1} and {path2} not identical"
|
390
|
-
|
391
|
-
# Verify the length of all columns in both dataframes is the same
|
392
|
-
for c1 in df1.columns:
|
393
|
-
assert (all(len(df1[c1]) == len(df1[c2]) for c2 in df1.columns)), \
|
394
|
-
f"Not all columns from {path1} have same length"
|
395
|
-
|
396
|
-
assert (all(len(df1[c1]) == len(df2[c2]) for c2 in df1.columns)), \
|
397
|
-
(f"Not all columns from {path1} and {path2} have "
|
398
|
-
"the same length")
|
399
|
-
|
400
|
-
|
401
|
-
class ExpStatisticsCalculator:
|
402
|
-
"""Generate statistics from output files for all runs within an experiment.
|
403
|
-
|
404
|
-
.. IMPORTANT:: You *CANNOT* use logging ANYWHERE during processing .csv
|
405
|
-
files. Why ? I *think* because of a bug in the logging module itself. If
|
406
|
-
you get unlucky enough to spawn the process which enters the __call__()
|
407
|
-
method in this class while another logging statement is in progress (and
|
408
|
-
is therefore holding an internal logging module lock), then the
|
409
|
-
underlying fork() call will copy the lock in the acquired state. Then,
|
410
|
-
when this class goes to try to log something, it deadlocks with itself.
|
411
|
-
|
412
|
-
You also can't just create loggers with unique names, as this seems to be
|
413
|
-
something like the GIL, but for the logging module. Sometimes python
|
414
|
-
sucks.
|
415
|
-
"""
|
416
|
-
|
417
|
-
def __init__(self,
|
418
|
-
main_config: types.YAMLDict,
|
419
|
-
avg_opts: dict,
|
420
|
-
batch_stat_root: pathlib.Path) -> None:
|
421
|
-
self.avg_opts = avg_opts
|
422
|
-
|
423
|
-
# will get the main name and extension of the config file (without the
|
424
|
-
# full absolute path)
|
425
|
-
self.template_input_fname = self.avg_opts['template_input_leaf']
|
426
|
-
|
427
|
-
self.main_config = main_config
|
428
|
-
self.batch_stat_root = batch_stat_root
|
429
|
-
|
430
|
-
self.intra_perf_csv = main_config['sierra']['perf']['intra_perf_csv']
|
431
|
-
self.intra_perf_col = main_config['sierra']['perf']['intra_perf_col']
|
432
|
-
|
433
|
-
def __call__(self,
|
434
|
-
gather_spec: GatherSpec,
|
435
|
-
gathered_dfs: tp.List[pd.DataFrame]) -> None:
|
436
|
-
|
437
|
-
csv_concat = pd.concat(gathered_dfs)
|
438
|
-
|
439
|
-
exp_stat_root = self.batch_stat_root / gather_spec.exp_name
|
440
|
-
utils.dir_create_checked(exp_stat_root, exist_ok=True)
|
441
|
-
|
442
|
-
# Create directory for averaged .csv files for imagizing later.
|
443
|
-
if gather_spec.for_imagizing():
|
444
|
-
utils.dir_create_checked(exp_stat_root / gather_spec.item_stem,
|
445
|
-
exist_ok=True)
|
446
|
-
|
447
|
-
by_row_index = csv_concat.groupby(csv_concat.index)
|
448
|
-
|
449
|
-
dfs = {}
|
450
|
-
if self.avg_opts['dist_stats'] in ['none', 'all']:
|
451
|
-
dfs.update(stat_kernels.mean.from_groupby(by_row_index))
|
452
|
-
|
453
|
-
if self.avg_opts['dist_stats'] in ['conf95', 'all']:
|
454
|
-
dfs.update(stat_kernels.conf95.from_groupby(by_row_index))
|
455
|
-
|
456
|
-
if self.avg_opts['dist_stats'] in ['bw', 'all']:
|
457
|
-
dfs.update(stat_kernels.bw.from_groupby(by_row_index))
|
458
|
-
|
459
|
-
for ext in dfs:
|
460
|
-
opath = exp_stat_root / gather_spec.item_stem
|
461
|
-
|
462
|
-
if gather_spec.for_imagizing():
|
463
|
-
opath /= (gather_spec.imagize_csv_stem + ext)
|
464
|
-
|
465
|
-
else:
|
466
|
-
opath = opath.with_suffix(ext)
|
467
|
-
|
468
|
-
df = utils.df_fill(dfs[ext], self.avg_opts['df_homogenize'])
|
469
|
-
writer = storage.DataFrameWriter(self.avg_opts['storage_medium'])
|
470
|
-
writer(df, opath, index=False)
|
471
|
-
|
472
|
-
|
473
|
-
__api__ = [
|
474
|
-
'GatherSpec',
|
475
|
-
'BatchExpParallelCalculator',
|
476
|
-
'ExpCSVGatherer',
|
477
|
-
'ExpStatisticsCalculator'
|
478
|
-
]
|