idmtools 0.0.0.dev0__py3-none-any.whl → 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- idmtools/__init__.py +27 -8
- idmtools/analysis/__init__.py +5 -0
- idmtools/analysis/add_analyzer.py +89 -0
- idmtools/analysis/analyze_manager.py +490 -0
- idmtools/analysis/csv_analyzer.py +103 -0
- idmtools/analysis/download_analyzer.py +96 -0
- idmtools/analysis/map_worker_entry.py +100 -0
- idmtools/analysis/platform_analysis_bootstrap.py +94 -0
- idmtools/analysis/platform_anaylsis.py +291 -0
- idmtools/analysis/tags_analyzer.py +93 -0
- idmtools/assets/__init__.py +9 -0
- idmtools/assets/asset.py +453 -0
- idmtools/assets/asset_collection.py +514 -0
- idmtools/assets/content_handlers.py +19 -0
- idmtools/assets/errors.py +23 -0
- idmtools/assets/file_list.py +191 -0
- idmtools/builders/__init__.py +11 -0
- idmtools/builders/arm_simulation_builder.py +152 -0
- idmtools/builders/csv_simulation_builder.py +76 -0
- idmtools/builders/simulation_builder.py +348 -0
- idmtools/builders/sweep_arm.py +109 -0
- idmtools/builders/yaml_simulation_builder.py +82 -0
- idmtools/config/__init__.py +7 -0
- idmtools/config/idm_config_parser.py +486 -0
- idmtools/core/__init__.py +10 -0
- idmtools/core/cache_enabled.py +114 -0
- idmtools/core/context.py +68 -0
- idmtools/core/docker_task.py +207 -0
- idmtools/core/enums.py +51 -0
- idmtools/core/exceptions.py +91 -0
- idmtools/core/experiment_factory.py +71 -0
- idmtools/core/id_file.py +70 -0
- idmtools/core/interfaces/__init__.py +5 -0
- idmtools/core/interfaces/entity_container.py +64 -0
- idmtools/core/interfaces/iassets_enabled.py +58 -0
- idmtools/core/interfaces/ientity.py +331 -0
- idmtools/core/interfaces/iitem.py +206 -0
- idmtools/core/interfaces/imetadata_operations.py +89 -0
- idmtools/core/interfaces/inamed_entity.py +17 -0
- idmtools/core/interfaces/irunnable_entity.py +159 -0
- idmtools/core/logging.py +387 -0
- idmtools/core/platform_factory.py +316 -0
- idmtools/core/system_information.py +104 -0
- idmtools/core/task_factory.py +145 -0
- idmtools/entities/__init__.py +10 -0
- idmtools/entities/command_line.py +229 -0
- idmtools/entities/command_task.py +155 -0
- idmtools/entities/experiment.py +787 -0
- idmtools/entities/generic_workitem.py +43 -0
- idmtools/entities/ianalyzer.py +163 -0
- idmtools/entities/iplatform.py +1106 -0
- idmtools/entities/iplatform_default.py +39 -0
- idmtools/entities/iplatform_ops/__init__.py +5 -0
- idmtools/entities/iplatform_ops/iplatform_asset_collection_operations.py +148 -0
- idmtools/entities/iplatform_ops/iplatform_experiment_operations.py +415 -0
- idmtools/entities/iplatform_ops/iplatform_simulation_operations.py +315 -0
- idmtools/entities/iplatform_ops/iplatform_suite_operations.py +322 -0
- idmtools/entities/iplatform_ops/iplatform_workflowitem_operations.py +301 -0
- idmtools/entities/iplatform_ops/utils.py +185 -0
- idmtools/entities/itask.py +316 -0
- idmtools/entities/iworkflow_item.py +167 -0
- idmtools/entities/platform_requirements.py +20 -0
- idmtools/entities/relation_type.py +14 -0
- idmtools/entities/simulation.py +255 -0
- idmtools/entities/suite.py +188 -0
- idmtools/entities/task_proxy.py +37 -0
- idmtools/entities/templated_simulation.py +325 -0
- idmtools/frozen/frozen_dict.py +71 -0
- idmtools/frozen/frozen_list.py +66 -0
- idmtools/frozen/frozen_set.py +86 -0
- idmtools/frozen/frozen_tuple.py +18 -0
- idmtools/frozen/frozen_utils.py +179 -0
- idmtools/frozen/ifrozen.py +66 -0
- idmtools/plugins/__init__.py +5 -0
- idmtools/plugins/git_commit.py +117 -0
- idmtools/registry/__init__.py +4 -0
- idmtools/registry/experiment_specification.py +105 -0
- idmtools/registry/functions.py +28 -0
- idmtools/registry/hook_specs.py +132 -0
- idmtools/registry/master_plugin_registry.py +51 -0
- idmtools/registry/platform_specification.py +138 -0
- idmtools/registry/plugin_specification.py +129 -0
- idmtools/registry/task_specification.py +104 -0
- idmtools/registry/utils.py +119 -0
- idmtools/services/__init__.py +5 -0
- idmtools/services/ipersistance_service.py +135 -0
- idmtools/services/platforms.py +13 -0
- idmtools/utils/__init__.py +5 -0
- idmtools/utils/caller.py +24 -0
- idmtools/utils/collections.py +246 -0
- idmtools/utils/command_line.py +45 -0
- idmtools/utils/decorators.py +300 -0
- idmtools/utils/display/__init__.py +22 -0
- idmtools/utils/display/displays.py +181 -0
- idmtools/utils/display/settings.py +25 -0
- idmtools/utils/dropbox_location.py +30 -0
- idmtools/utils/entities.py +127 -0
- idmtools/utils/file.py +72 -0
- idmtools/utils/file_parser.py +151 -0
- idmtools/utils/filter_simulations.py +182 -0
- idmtools/utils/filters/__init__.py +5 -0
- idmtools/utils/filters/asset_filters.py +88 -0
- idmtools/utils/general.py +286 -0
- idmtools/utils/gitrepo.py +336 -0
- idmtools/utils/hashing.py +239 -0
- idmtools/utils/info.py +124 -0
- idmtools/utils/json.py +82 -0
- idmtools/utils/language.py +107 -0
- idmtools/utils/local_os.py +40 -0
- idmtools/utils/time.py +22 -0
- idmtools-0.0.2.dist-info/METADATA +120 -0
- idmtools-0.0.2.dist-info/RECORD +116 -0
- idmtools-0.0.2.dist-info/entry_points.txt +9 -0
- idmtools-0.0.2.dist-info/licenses/LICENSE.TXT +3 -0
- idmtools-0.0.0.dev0.dist-info/METADATA +0 -41
- idmtools-0.0.0.dev0.dist-info/RECORD +0 -5
- {idmtools-0.0.0.dev0.dist-info → idmtools-0.0.2.dist-info}/WHEEL +0 -0
- {idmtools-0.0.0.dev0.dist-info → idmtools-0.0.2.dist-info}/top_level.txt +0 -0
idmtools/__init__.py
CHANGED
|
@@ -1,8 +1,27 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
1
|
+
"""idmtools core package.
|
|
2
|
+
|
|
3
|
+
This init installs a system exception hook for idmtools.
|
|
4
|
+
It also ensures the configuration is loaded.
|
|
5
|
+
|
|
6
|
+
Copyright 2021, Bill & Melinda Gates Foundation. All rights reserved.
|
|
7
|
+
"""
|
|
8
|
+
import sys
|
|
9
|
+
from idmtools.core.exceptions import idmtools_error_handler
|
|
10
|
+
from idmtools.config.idm_config_parser import IdmConfigParser # noqa: F401
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
from importlib.metadata import version, PackageNotFoundError
|
|
14
|
+
except ImportError:
|
|
15
|
+
# Python < 3.8
|
|
16
|
+
from importlib_metadata import version, PackageNotFoundError
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
__version__ = version("idmtools") # Use your actual package name
|
|
20
|
+
except PackageNotFoundError:
|
|
21
|
+
# Package not installed, use fallback
|
|
22
|
+
__version__ = "0.0.0+unknown"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# only set exception hook if it has not been overridden
|
|
26
|
+
if sys.excepthook == sys.__excepthook__:
|
|
27
|
+
sys.excepthook = idmtools_error_handler
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""idmtools add analyzer.
|
|
2
|
+
|
|
3
|
+
More of an example.
|
|
4
|
+
|
|
5
|
+
Copyright 2021, Bill & Melinda Gates Foundation. All rights reserved.
|
|
6
|
+
"""
|
|
7
|
+
import os
|
|
8
|
+
from idmtools.entities.ianalyzer import IAnalyzer, ANALYZABLE_ITEM
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class AddAnalyzer(IAnalyzer):
|
|
12
|
+
"""
|
|
13
|
+
A simple base class to add analyzers.
|
|
14
|
+
|
|
15
|
+
Examples:
|
|
16
|
+
.. literalinclude:: ../../examples/analyzers/example_analysis_AddAnalyzer.py
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self, filenames=None, output_path='output'):
|
|
20
|
+
"""
|
|
21
|
+
Initialize our analyzer.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
filenames: Filename to fetch
|
|
25
|
+
output_path: Path to write output to
|
|
26
|
+
"""
|
|
27
|
+
super().__init__()
|
|
28
|
+
self.output_path = output_path
|
|
29
|
+
self.filenames = filenames or []
|
|
30
|
+
|
|
31
|
+
# We only want the raw files -> disable parsing
|
|
32
|
+
self.parse = True
|
|
33
|
+
|
|
34
|
+
def filter(self, item: ANALYZABLE_ITEM) -> bool:
|
|
35
|
+
"""
|
|
36
|
+
Determine whether the given item should be included by this analyzer.
|
|
37
|
+
|
|
38
|
+
This implementation accepts all items unconditionally.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
item (ANALYZABLE_ITEM): The item to be evaluated.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
bool: Always returns True, meaning all items are accepted.
|
|
45
|
+
"""
|
|
46
|
+
return True # download them all!
|
|
47
|
+
|
|
48
|
+
def initialize(self):
|
|
49
|
+
"""
|
|
50
|
+
Initialize our analyzer before running it.
|
|
51
|
+
|
|
52
|
+
We use this to create our output directory.
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
None
|
|
56
|
+
"""
|
|
57
|
+
self.output_path = os.path.join(self.working_dir, self.output_path)
|
|
58
|
+
os.makedirs(self.output_path, exist_ok=True)
|
|
59
|
+
|
|
60
|
+
def map(self, data, item: ANALYZABLE_ITEM):
|
|
61
|
+
"""
|
|
62
|
+
Run this on each item and the files we retrieve.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
data: Map of filesnames -> content
|
|
66
|
+
item: Item we are mapping
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Values added up
|
|
70
|
+
"""
|
|
71
|
+
number = int(list(data.values())[0].split()[10])
|
|
72
|
+
result = number + 100
|
|
73
|
+
return result
|
|
74
|
+
|
|
75
|
+
# ck4, should we pass objects as the keys? e.g. Item-type, not just their id
|
|
76
|
+
def reduce(self, data):
|
|
77
|
+
"""
|
|
78
|
+
Combine all the data we mapped.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
data: Map of results in form Item -> map results
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
Sum of all the results
|
|
85
|
+
"""
|
|
86
|
+
# data is currently a dict with item_id: value entries
|
|
87
|
+
value = sum(data.values())
|
|
88
|
+
print(value)
|
|
89
|
+
return value
|
|
@@ -0,0 +1,490 @@
|
|
|
1
|
+
"""idmtools Analyzer manager.
|
|
2
|
+
|
|
3
|
+
AnalyzerManager is the "driver" of analysis. Analysis is mostly a map reduce operation.
|
|
4
|
+
|
|
5
|
+
Copyright 2021, Bill & Melinda Gates Foundation. All rights reserved.
|
|
6
|
+
"""
|
|
7
|
+
import os
|
|
8
|
+
import sys
|
|
9
|
+
import time
|
|
10
|
+
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
|
|
11
|
+
from logging import getLogger, DEBUG
|
|
12
|
+
from typing import NoReturn, List, Dict, Tuple, Optional, TYPE_CHECKING
|
|
13
|
+
from tqdm import tqdm
|
|
14
|
+
from idmtools import IdmConfigParser
|
|
15
|
+
from idmtools.analysis.map_worker_entry import map_item
|
|
16
|
+
from idmtools.core import NoPlatformException
|
|
17
|
+
from idmtools.core.enums import ItemType
|
|
18
|
+
from idmtools.core.interfaces.ientity import IEntity
|
|
19
|
+
from idmtools.core.logging import VERBOSE, SUCCESS
|
|
20
|
+
from idmtools.entities.ianalyzer import IAnalyzer
|
|
21
|
+
from idmtools.utils.language import on_off, verbose_timedelta
|
|
22
|
+
|
|
23
|
+
if TYPE_CHECKING: # pragma: no cover
|
|
24
|
+
from idmtools.entities.iplatform import IPlatform
|
|
25
|
+
|
|
26
|
+
logger = getLogger(__name__)
|
|
27
|
+
user_logger = getLogger('user')
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def pool_worker_initializer(func, analyzers, platform: 'IPlatform') -> NoReturn:
|
|
31
|
+
"""
|
|
32
|
+
Initialize the pool worker, which allows the process pool to associate the analyzers, cache, and path mapping to the function executed to retrieve data.
|
|
33
|
+
|
|
34
|
+
Using an initializer improves performance.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
func: The function that the pool will call.
|
|
38
|
+
analyzers: The list of all analyzers to run.
|
|
39
|
+
platform: The platform to communicate with to retrieve files from.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
None
|
|
43
|
+
"""
|
|
44
|
+
func.analyzers = analyzers
|
|
45
|
+
func.platform = platform
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class AnalyzeManager:
|
|
49
|
+
"""
|
|
50
|
+
Analyzer Manager Class. This is the main driver of analysis.
|
|
51
|
+
"""
|
|
52
|
+
ANALYZE_TIMEOUT = 3600 * 8 # Maximum seconds before timing out - set to 8 hours
|
|
53
|
+
WAIT_TIME = 1.15 # How much time to wait between check if the analysis is done
|
|
54
|
+
EXCEPTION_KEY = '__EXCEPTION__'
|
|
55
|
+
|
|
56
|
+
class TimeOutException(Exception):
|
|
57
|
+
"""
|
|
58
|
+
TimeOutException is raised when the analysis times out.
|
|
59
|
+
"""
|
|
60
|
+
pass
|
|
61
|
+
|
|
62
|
+
class ItemsNotReady(Exception):
|
|
63
|
+
"""
|
|
64
|
+
ItemsNotReady is raised when items to be analyzed are still running.
|
|
65
|
+
|
|
66
|
+
Notes:
|
|
67
|
+
TODO - Add doc_link
|
|
68
|
+
"""
|
|
69
|
+
pass
|
|
70
|
+
|
|
71
|
+
def __init__(self, platform: 'IPlatform' = None, configuration: dict = None,
|
|
72
|
+
ids: List[Tuple[str, ItemType]] = None,
|
|
73
|
+
analyzers: List[IAnalyzer] = None, working_dir: str = None,
|
|
74
|
+
partial_analyze_ok: bool = False, max_items: Optional[int] = None, verbose: bool = True,
|
|
75
|
+
force_manager_working_directory: bool = False,
|
|
76
|
+
exclude_ids: List[str] = None, analyze_failed_items: bool = False,
|
|
77
|
+
max_workers: Optional[int] = None, executor_type: str = 'process'):
|
|
78
|
+
"""
|
|
79
|
+
Initialize the AnalyzeManager.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
platform (IPlatform): Platform
|
|
83
|
+
configuration (dict, optional): Initial Configuration. Defaults to None.
|
|
84
|
+
ids (Tuple[str, ItemType], optional): List of ids as pair of Tuple and ItemType. Defaults to None.
|
|
85
|
+
analyzers (List[IAnalyzer], optional): List of Analyzers. Defaults to None.
|
|
86
|
+
working_dir (str, optional): The working directory. Defaults to os.getcwd().
|
|
87
|
+
partial_analyze_ok (bool, optional): Whether partial analysis is ok. When this is True, Experiments in progress or Failed can be analyzed. Defaults to False.
|
|
88
|
+
max_items (int, optional): Max Items to analyze. Useful when developing and testing an Analyzer. Defaults to None.
|
|
89
|
+
verbose (bool, optional): Print extra information about analysis. Defaults to True.
|
|
90
|
+
force_manager_working_directory (bool, optional): [description]. Defaults to False.
|
|
91
|
+
exclude_ids (List[str], optional): [description]. Defaults to None.
|
|
92
|
+
analyze_failed_items (bool, optional): Allows analyzing of failed items. Useful when you are trying to aggregate items that have failed. Defaults to False.
|
|
93
|
+
max_workers (int, optional): Set the max workers. If not provided, falls back to the configuration item *max_threads*. If max_workers is not set in configuration, defaults to CPU count
|
|
94
|
+
executor_type: (str): Whether to use process or thread pooling. Process pooling is more efficient but threading might be required in some environments
|
|
95
|
+
"""
|
|
96
|
+
super().__init__()
|
|
97
|
+
if working_dir is None:
|
|
98
|
+
working_dir = os.getcwd()
|
|
99
|
+
if executor_type.lower() in ['process', 'thread']:
|
|
100
|
+
self.executor_type = executor_type.lower()
|
|
101
|
+
else:
|
|
102
|
+
raise ValueError(f'{executor_type} is not a valid type for executor_type. Choose either "process" or "thread"')
|
|
103
|
+
|
|
104
|
+
self.configuration = configuration or {}
|
|
105
|
+
|
|
106
|
+
# load platform from context or from passed in value
|
|
107
|
+
self.platform = platform
|
|
108
|
+
self.__check_for_platform_from_context(platform)
|
|
109
|
+
if max_workers is None:
|
|
110
|
+
# check for max workers on platform, then in common
|
|
111
|
+
if self.platform and hasattr(self.platform, '_config_block') and IdmConfigParser.get_option(self.platform._config_block, "max_workers", None):
|
|
112
|
+
self.configuration['max_workers'] = int(IdmConfigParser.get_option(self.platform._config_block, "max_workers", None))
|
|
113
|
+
elif IdmConfigParser().get_option('COMMON', 'max_workers', None):
|
|
114
|
+
self.configuration['max_workers'] = int(IdmConfigParser().get_option('COMMON', 'max_workers'))
|
|
115
|
+
|
|
116
|
+
# validate max_workers
|
|
117
|
+
if max_workers is not None and max_workers < 1:
|
|
118
|
+
raise ValueError("max_workers must be greater or equal to one")
|
|
119
|
+
# ensure max workers is int
|
|
120
|
+
self.max_processes = max_workers if max_workers is not None else self.configuration.get('max_workers', os.cpu_count())
|
|
121
|
+
if logger.isEnabledFor(DEBUG):
|
|
122
|
+
logger.debug(f'AnalyzeManager set to {self.max_processes}')
|
|
123
|
+
|
|
124
|
+
# Should we continue analyzing even when we encounter an error?
|
|
125
|
+
self.continue_on_error = False
|
|
126
|
+
|
|
127
|
+
# should we attempt to analyze failed items
|
|
128
|
+
self.analyze_failed_items = analyze_failed_items
|
|
129
|
+
|
|
130
|
+
# analyze at most this many items, regardless of how many have been given
|
|
131
|
+
self.max_items_to_analyze = max_items
|
|
132
|
+
|
|
133
|
+
# allows analysis to be performed even if some items are not ready for analysis
|
|
134
|
+
self.partial_analyze_ok = partial_analyze_ok or (self.max_items_to_analyze is not None)
|
|
135
|
+
|
|
136
|
+
# Each analyzers results will be in the working_dir directory if not specified by them directly.
|
|
137
|
+
# force_wd overrides this by forcing all results to be in working_dir .
|
|
138
|
+
self.working_dir = working_dir
|
|
139
|
+
self.force_wd = force_manager_working_directory
|
|
140
|
+
|
|
141
|
+
# Take the provided ids and determine the full set of unique root items (e.g. simulations) in them to analyze
|
|
142
|
+
logger.debug("Load information about items from platform")
|
|
143
|
+
ids = list(set(ids or list())) # uniquify
|
|
144
|
+
items: List[IEntity] = []
|
|
145
|
+
for oid, otype in ids:
|
|
146
|
+
logger.debug(f'Getting metadata for {oid} and {otype}')
|
|
147
|
+
item = self.platform.get_item(oid, otype, force=True, raw=True)
|
|
148
|
+
item.uid = str(item.id)
|
|
149
|
+
items.append(item)
|
|
150
|
+
self.potential_items: List[IEntity] = []
|
|
151
|
+
|
|
152
|
+
for i in items:
|
|
153
|
+
logger.debug(f'Flattening items for {i.uid}')
|
|
154
|
+
self.potential_items.extend(self.platform.flatten_item(item=i, raw=True))
|
|
155
|
+
|
|
156
|
+
# These are leaf items to be ignored in analysis. Prune them from analysis.
|
|
157
|
+
self.exclude_ids = exclude_ids or []
|
|
158
|
+
for index, oid in enumerate(self.exclude_ids):
|
|
159
|
+
self.exclude_ids[index] = str(oid)
|
|
160
|
+
self.potential_items = [item for item in self.potential_items if item.uid not in self.exclude_ids]
|
|
161
|
+
for item in self.potential_items:
|
|
162
|
+
item.platform = self.platform
|
|
163
|
+
|
|
164
|
+
logger.debug(f"Potential items to analyze: {len(self.potential_items)}")
|
|
165
|
+
|
|
166
|
+
self._items = dict() # filled in later by _get_items_to_analyze
|
|
167
|
+
|
|
168
|
+
self.analyzers = analyzers or list()
|
|
169
|
+
self.verbose = verbose
|
|
170
|
+
|
|
171
|
+
def __check_for_platform_from_context(self, platform) -> 'IPlatform': # noqa: F821
|
|
172
|
+
"""
|
|
173
|
+
Try to determine platform of current object from self or current platform.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
platform: Passed in platform object
|
|
177
|
+
|
|
178
|
+
Raises:
|
|
179
|
+
NoPlatformException: when no platform is on current context
|
|
180
|
+
Returns:
|
|
181
|
+
Platform object
|
|
182
|
+
"""
|
|
183
|
+
if self.platform is None:
|
|
184
|
+
# check context for current platform
|
|
185
|
+
if platform is None:
|
|
186
|
+
from idmtools.core.context import CURRENT_PLATFORM
|
|
187
|
+
if CURRENT_PLATFORM is None:
|
|
188
|
+
raise NoPlatformException("No Platform defined on object, in current context, or passed to run")
|
|
189
|
+
platform = CURRENT_PLATFORM
|
|
190
|
+
self.platform = platform
|
|
191
|
+
return self.platform
|
|
192
|
+
|
|
193
|
+
def add_item(self, item: IEntity) -> NoReturn:
|
|
194
|
+
"""
|
|
195
|
+
Add an additional item for analysis.
|
|
196
|
+
|
|
197
|
+
Args:
|
|
198
|
+
item: The new item to add for analysis.
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
None
|
|
202
|
+
"""
|
|
203
|
+
self.potential_items.extend(self.platform.flatten_item(item=item, raw=True))
|
|
204
|
+
|
|
205
|
+
def _get_items_to_analyze(self) -> Dict[str, IEntity]:
|
|
206
|
+
"""
|
|
207
|
+
Get a list of items derived from :meth:`self._items` that are available to analyze.
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
A list of :class:`~idmtools.entities.iitem.IItem` objects.
|
|
211
|
+
|
|
212
|
+
"""
|
|
213
|
+
# First sort items by whether they can currently be analyzed
|
|
214
|
+
can_analyze = {}
|
|
215
|
+
cannot_analyze = {}
|
|
216
|
+
for item in self.potential_items:
|
|
217
|
+
valid = self.platform.validate_item_for_analysis(item, self.analyze_failed_items)
|
|
218
|
+
if valid:
|
|
219
|
+
can_analyze[item.uid] = item
|
|
220
|
+
else:
|
|
221
|
+
cannot_analyze[item.uid] = item
|
|
222
|
+
|
|
223
|
+
# now consider item limiting arguments
|
|
224
|
+
if self.partial_analyze_ok:
|
|
225
|
+
if self.max_items_to_analyze is not None:
|
|
226
|
+
return {item.uid: item for item in list(can_analyze.values())[0:self.max_items_to_analyze]}
|
|
227
|
+
return can_analyze
|
|
228
|
+
|
|
229
|
+
if len(cannot_analyze) > 0:
|
|
230
|
+
raise self.ItemsNotReady('There are %d items that cannot be analyzed and partial_analyze_ok is off.' %
|
|
231
|
+
len(cannot_analyze))
|
|
232
|
+
|
|
233
|
+
return can_analyze
|
|
234
|
+
|
|
235
|
+
def add_analyzer(self, analyzer: IAnalyzer) -> NoReturn:
|
|
236
|
+
"""
|
|
237
|
+
Add another analyzer to use on the items to be analyzed.
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
analyzer: An analyzer object (:class:`~idmtools.entities.ianalyzer.IAnalyzer`).
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
None
|
|
244
|
+
"""
|
|
245
|
+
self.analyzers.append(analyzer)
|
|
246
|
+
|
|
247
|
+
def _update_analyzer_uids(self) -> NoReturn:
|
|
248
|
+
"""
|
|
249
|
+
Ensure that each analyzer has a unique ID in this context by updating them as needed.
|
|
250
|
+
|
|
251
|
+
Returns:
|
|
252
|
+
None
|
|
253
|
+
"""
|
|
254
|
+
unique_uids = {analyzer.uid for analyzer in self.analyzers}
|
|
255
|
+
if len(unique_uids) < len(self.analyzers):
|
|
256
|
+
for i in range(len(self.analyzers)):
|
|
257
|
+
self.analyzers[i].uid += f'-{i}'
|
|
258
|
+
logger.debug(f'Analyzer {i.__class__} id set to {self.analyzers[i].uid}')
|
|
259
|
+
|
|
260
|
+
def _initialize_analyzers(self) -> NoReturn:
|
|
261
|
+
"""
|
|
262
|
+
Do the steps needed to prepare analyzers for item analysis.
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
None
|
|
266
|
+
"""
|
|
267
|
+
logger.debug("Initializing Analyzers")
|
|
268
|
+
# Setup the working directory and call initialize() on each analyzer
|
|
269
|
+
for analyzer in self.analyzers:
|
|
270
|
+
if self.force_wd:
|
|
271
|
+
analyzer.working_dir = self.working_dir
|
|
272
|
+
else:
|
|
273
|
+
analyzer.working_dir = analyzer.working_dir or self.working_dir
|
|
274
|
+
|
|
275
|
+
if logger.isEnabledFor(DEBUG):
|
|
276
|
+
logger.debug(f"Analyzer working directory set to {analyzer.working_dir}")
|
|
277
|
+
analyzer.initialize()
|
|
278
|
+
|
|
279
|
+
# make sure each analyzer in self.analyzers has a unique uid
|
|
280
|
+
self._update_analyzer_uids()
|
|
281
|
+
|
|
282
|
+
def _print_configuration(self, n_items: int, n_processes: int) -> NoReturn:
|
|
283
|
+
"""
|
|
284
|
+
Display some information about an ongoing analysis.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
n_items: The number of items being analyzed.
|
|
288
|
+
n_processes: The number of active item processing handlers.
|
|
289
|
+
|
|
290
|
+
Returns:
|
|
291
|
+
None
|
|
292
|
+
"""
|
|
293
|
+
n_ignored_items = len(self.potential_items) - n_items
|
|
294
|
+
user_logger.log(VERBOSE, 'Analyze Manager')
|
|
295
|
+
user_logger.log(VERBOSE, f' | {n_items} item(s) selected for analysis')
|
|
296
|
+
user_logger.log(VERBOSE, f' | partial_analyze_ok is {self.partial_analyze_ok}, max_items is '
|
|
297
|
+
f'{self.max_items_to_analyze}, and {n_ignored_items} item(s) are being ignored')
|
|
298
|
+
user_logger.log(VERBOSE, ' | Analyzer(s): ')
|
|
299
|
+
for analyzer in self.analyzers:
|
|
300
|
+
user_logger.log(VERBOSE, f' | - {analyzer.uid} File parsing: {on_off(analyzer.parse)} / Use '
|
|
301
|
+
f'cache: {on_off(hasattr(analyzer, "cache"))}')
|
|
302
|
+
if hasattr(analyzer, 'need_dir_map'):
|
|
303
|
+
user_logger.log(VERBOSE, f' | (Directory map: {on_off(analyzer.need_dir_map)}')
|
|
304
|
+
user_logger.log(VERBOSE, f' | Pool of {n_processes} analyzing {self.executor_type}(es)')
|
|
305
|
+
|
|
306
|
+
def _run_and_wait_for_mapping(self, executor) -> Tuple[Dict, bool]:
|
|
307
|
+
"""
|
|
308
|
+
Run and manage the mapping call on each item.
|
|
309
|
+
|
|
310
|
+
Args:
|
|
311
|
+
executor: A pool of workers.
|
|
312
|
+
|
|
313
|
+
Returns:
|
|
314
|
+
False if an exception occurred processing **.map** on any item; otherwise True (succeeded).
|
|
315
|
+
|
|
316
|
+
"""
|
|
317
|
+
# add items to process (map)
|
|
318
|
+
n_items = len(self._items)
|
|
319
|
+
logger.debug(f"Number of items for analysis: {n_items}")
|
|
320
|
+
logger.debug("Mapping the items for analysis")
|
|
321
|
+
futures = dict()
|
|
322
|
+
results = dict()
|
|
323
|
+
status = True
|
|
324
|
+
# create status bar and then queue our futures
|
|
325
|
+
with tqdm(total=len(self._items)) as progress:
|
|
326
|
+
for i in self._items.values():
|
|
327
|
+
future = executor.submit(map_item, i)
|
|
328
|
+
future.add_done_callback(lambda p: progress.update())
|
|
329
|
+
futures[future] = i
|
|
330
|
+
|
|
331
|
+
# wait on our futures to complete, catch exceptions, and aggregate results
|
|
332
|
+
for future in as_completed(futures.keys()):
|
|
333
|
+
if future.exception():
|
|
334
|
+
status = False
|
|
335
|
+
ex = future.exception()
|
|
336
|
+
user_logger.error(ex)
|
|
337
|
+
if not self.continue_on_error:
|
|
338
|
+
raise ex
|
|
339
|
+
else:
|
|
340
|
+
results[futures[future]] = future.result()
|
|
341
|
+
|
|
342
|
+
logger.debug(f"Result fetching status: : {status}")
|
|
343
|
+
return results, status
|
|
344
|
+
|
|
345
|
+
def _run_and_wait_for_reducing(self, executor, results) -> dict:
|
|
346
|
+
"""
|
|
347
|
+
Run and manage the reduce call on the combined item results (by analyzer).
|
|
348
|
+
|
|
349
|
+
Args:
|
|
350
|
+
executor: A pool of workers.
|
|
351
|
+
|
|
352
|
+
Returns:
|
|
353
|
+
An analyzer ID keyed dictionary of finalize results.
|
|
354
|
+
|
|
355
|
+
"""
|
|
356
|
+
# the keys in self.cache from map() calls are expected to be item ids. Each keyed value
|
|
357
|
+
# contains analyzer_id: item_results_for_analyzer entries.
|
|
358
|
+
logger.debug("Running reduce results")
|
|
359
|
+
futures = {}
|
|
360
|
+
finalize_results = {}
|
|
361
|
+
# create a progress bar
|
|
362
|
+
with tqdm(total=len(self.analyzers), desc="Running Analyzer Reduces") as progress:
|
|
363
|
+
# for each analyzer, queue our futures
|
|
364
|
+
for analyzer in self.analyzers:
|
|
365
|
+
logger.debug(f"Gather data for {analyzer.uid}")
|
|
366
|
+
item_data_for_analyzer = {}
|
|
367
|
+
for item, data in results.items():
|
|
368
|
+
if analyzer.uid in data:
|
|
369
|
+
item_data_for_analyzer[item] = data[analyzer.uid]
|
|
370
|
+
if item_data_for_analyzer.__len__() == 0:
|
|
371
|
+
user_logger.warning(f"Note: {analyzer.uid} has no simulation data to analyze. Please verify the filter or map function of the analyzer.")
|
|
372
|
+
future = executor.submit(analyzer.reduce, item_data_for_analyzer)
|
|
373
|
+
future.add_done_callback(lambda p: progress.update())
|
|
374
|
+
|
|
375
|
+
logger.debug(f"Queueing {analyzer.uid}")
|
|
376
|
+
futures[future] = analyzer.uid
|
|
377
|
+
|
|
378
|
+
# wait on our futures, catch exceptions, and aggregate results
|
|
379
|
+
logger.debug("Waiting for results")
|
|
380
|
+
for future in as_completed(futures.keys()):
|
|
381
|
+
if future.exception():
|
|
382
|
+
user_logger.error(f'Reduce for Analyzer {futures[future]} failed')
|
|
383
|
+
user_logger.exception(future.exception())
|
|
384
|
+
user_logger.error("See log for details")
|
|
385
|
+
if not self.continue_on_error:
|
|
386
|
+
sys.exit(-1)
|
|
387
|
+
else:
|
|
388
|
+
finalize_results[futures[future]] = future.result()
|
|
389
|
+
if logger.isEnabledFor(DEBUG):
|
|
390
|
+
logger.debug("Finished reducing results")
|
|
391
|
+
for future in futures.keys():
|
|
392
|
+
future.cancel()
|
|
393
|
+
return finalize_results
|
|
394
|
+
|
|
395
|
+
def analyze(self) -> bool:
|
|
396
|
+
"""
|
|
397
|
+
Process the provided items with the provided analyzers. This is the main driver method of :class:`AnalyzeManager`.
|
|
398
|
+
|
|
399
|
+
Args:
|
|
400
|
+
kwargs: extra parameters
|
|
401
|
+
|
|
402
|
+
Returns:
|
|
403
|
+
True on success; False on failure/exception.
|
|
404
|
+
"""
|
|
405
|
+
start_time = time.time()
|
|
406
|
+
|
|
407
|
+
# If no analyzers or simulations have been provided, there is nothing to do
|
|
408
|
+
|
|
409
|
+
if len(self.analyzers) == 0:
|
|
410
|
+
user_logger.error('No analyzers were provided; cannot run analysis.')
|
|
411
|
+
return False
|
|
412
|
+
self._initialize_analyzers()
|
|
413
|
+
|
|
414
|
+
if len(self.potential_items) == 0:
|
|
415
|
+
user_logger.error('No items were provided; cannot run analysis.')
|
|
416
|
+
return False
|
|
417
|
+
# trim processing to those items that are ready and match requested limits
|
|
418
|
+
self._items: Dict[str, IEntity] = self._get_items_to_analyze()
|
|
419
|
+
|
|
420
|
+
if len(self._items) == 0:
|
|
421
|
+
user_logger.error('No items are ready; cannot run analysis.')
|
|
422
|
+
return False
|
|
423
|
+
|
|
424
|
+
# initialize mapping results cache/storage
|
|
425
|
+
n_items = len(self._items)
|
|
426
|
+
n_processes = min(self.max_processes, max(n_items, 1))
|
|
427
|
+
|
|
428
|
+
logger.info(f'Analyzing {n_items}')
|
|
429
|
+
|
|
430
|
+
# do any platform-specific initializations
|
|
431
|
+
logger.debug("Triggering per group functions")
|
|
432
|
+
for analyzer in self.analyzers:
|
|
433
|
+
analyzer.per_group(items=self._items)
|
|
434
|
+
|
|
435
|
+
if self.verbose:
|
|
436
|
+
self._print_configuration(n_items, n_processes)
|
|
437
|
+
|
|
438
|
+
no_print_config_exists = False
|
|
439
|
+
# Before we initialize processes, ensure no warning about config are set
|
|
440
|
+
if 'IDMTOOLS_NO_PRINT_CONFIG_USED' not in os.environ:
|
|
441
|
+
os.environ['IDMTOOLS_NO_PRINT_CONFIG_USED'] = "1"
|
|
442
|
+
os.environ['IDMTOOLS_HIDE_DEV_WARNING'] = "1"
|
|
443
|
+
os.environ['IDMTOOLS_NO_CONFIG_WARNING'] = "1"
|
|
444
|
+
else:
|
|
445
|
+
no_print_config_exists = True
|
|
446
|
+
|
|
447
|
+
# create worker pool
|
|
448
|
+
try:
|
|
449
|
+
# To ensure subprocesses reuse same config file, pass it through environment vars
|
|
450
|
+
config_file = IdmConfigParser().get_config_path()
|
|
451
|
+
if config_file:
|
|
452
|
+
os.environ['IDMTOOLS_CONFIG_FILE'] = config_file
|
|
453
|
+
|
|
454
|
+
# our options for our executor
|
|
455
|
+
opts = dict(max_workers=n_processes, initializer=pool_worker_initializer, initargs=(map_item, self.analyzers, self.platform))
|
|
456
|
+
# determine type. Most cases we want a process, but sometimes(like in Jupyter notebooks, we want to use threads)
|
|
457
|
+
if self.executor_type == 'process':
|
|
458
|
+
executor = ProcessPoolExecutor(**opts)
|
|
459
|
+
else:
|
|
460
|
+
executor = ThreadPoolExecutor(**opts)
|
|
461
|
+
|
|
462
|
+
map_results, status = self._run_and_wait_for_mapping(executor)
|
|
463
|
+
finalize_results = self._run_and_wait_for_reducing(executor, map_results)
|
|
464
|
+
|
|
465
|
+
finally:
|
|
466
|
+
# because of debug mode, we have to leave executor and let python handle the shutdown through del
|
|
467
|
+
# see https://youtrack.jetbrains.com/issue/PY-34432
|
|
468
|
+
os.environ['NO_LOGGING_INIT'] = 'n'
|
|
469
|
+
logger.debug("Shutting down workers")
|
|
470
|
+
|
|
471
|
+
for analyzer in self.analyzers:
|
|
472
|
+
analyzer.results = finalize_results[analyzer.uid]
|
|
473
|
+
|
|
474
|
+
logger.debug("Destroying analyzers")
|
|
475
|
+
for analyzer in self.analyzers:
|
|
476
|
+
analyzer.destroy()
|
|
477
|
+
|
|
478
|
+
if not no_print_config_exists:
|
|
479
|
+
del os.environ['IDMTOOLS_NO_PRINT_CONFIG_USED']
|
|
480
|
+
del os.environ['IDMTOOLS_HIDE_DEV_WARNING']
|
|
481
|
+
del os.environ['IDMTOOLS_NO_CONFIG_WARNING']
|
|
482
|
+
if 'IDMTOOLS_CONFIG_FILE' in os.environ:
|
|
483
|
+
del os.environ['IDMTOOLS_CONFIG_FILE']
|
|
484
|
+
|
|
485
|
+
if self.verbose:
|
|
486
|
+
total_time = time.time() - start_time
|
|
487
|
+
time_str = verbose_timedelta(total_time)
|
|
488
|
+
user_logger.log(SUCCESS, '\r | Analysis complete. Took {} '
|
|
489
|
+
'(~ {:.3f} per item)'.format(time_str, total_time / n_items))
|
|
490
|
+
return True
|