idmtools 0.0.0.dev0__py3-none-any.whl → 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. idmtools/__init__.py +27 -8
  2. idmtools/analysis/__init__.py +5 -0
  3. idmtools/analysis/add_analyzer.py +89 -0
  4. idmtools/analysis/analyze_manager.py +490 -0
  5. idmtools/analysis/csv_analyzer.py +103 -0
  6. idmtools/analysis/download_analyzer.py +96 -0
  7. idmtools/analysis/map_worker_entry.py +100 -0
  8. idmtools/analysis/platform_analysis_bootstrap.py +94 -0
  9. idmtools/analysis/platform_anaylsis.py +291 -0
  10. idmtools/analysis/tags_analyzer.py +93 -0
  11. idmtools/assets/__init__.py +9 -0
  12. idmtools/assets/asset.py +453 -0
  13. idmtools/assets/asset_collection.py +514 -0
  14. idmtools/assets/content_handlers.py +19 -0
  15. idmtools/assets/errors.py +23 -0
  16. idmtools/assets/file_list.py +191 -0
  17. idmtools/builders/__init__.py +11 -0
  18. idmtools/builders/arm_simulation_builder.py +152 -0
  19. idmtools/builders/csv_simulation_builder.py +76 -0
  20. idmtools/builders/simulation_builder.py +348 -0
  21. idmtools/builders/sweep_arm.py +109 -0
  22. idmtools/builders/yaml_simulation_builder.py +82 -0
  23. idmtools/config/__init__.py +7 -0
  24. idmtools/config/idm_config_parser.py +486 -0
  25. idmtools/core/__init__.py +10 -0
  26. idmtools/core/cache_enabled.py +114 -0
  27. idmtools/core/context.py +68 -0
  28. idmtools/core/docker_task.py +207 -0
  29. idmtools/core/enums.py +51 -0
  30. idmtools/core/exceptions.py +91 -0
  31. idmtools/core/experiment_factory.py +71 -0
  32. idmtools/core/id_file.py +70 -0
  33. idmtools/core/interfaces/__init__.py +5 -0
  34. idmtools/core/interfaces/entity_container.py +64 -0
  35. idmtools/core/interfaces/iassets_enabled.py +58 -0
  36. idmtools/core/interfaces/ientity.py +331 -0
  37. idmtools/core/interfaces/iitem.py +206 -0
  38. idmtools/core/interfaces/imetadata_operations.py +89 -0
  39. idmtools/core/interfaces/inamed_entity.py +17 -0
  40. idmtools/core/interfaces/irunnable_entity.py +159 -0
  41. idmtools/core/logging.py +387 -0
  42. idmtools/core/platform_factory.py +316 -0
  43. idmtools/core/system_information.py +104 -0
  44. idmtools/core/task_factory.py +145 -0
  45. idmtools/entities/__init__.py +10 -0
  46. idmtools/entities/command_line.py +229 -0
  47. idmtools/entities/command_task.py +155 -0
  48. idmtools/entities/experiment.py +787 -0
  49. idmtools/entities/generic_workitem.py +43 -0
  50. idmtools/entities/ianalyzer.py +163 -0
  51. idmtools/entities/iplatform.py +1106 -0
  52. idmtools/entities/iplatform_default.py +39 -0
  53. idmtools/entities/iplatform_ops/__init__.py +5 -0
  54. idmtools/entities/iplatform_ops/iplatform_asset_collection_operations.py +148 -0
  55. idmtools/entities/iplatform_ops/iplatform_experiment_operations.py +415 -0
  56. idmtools/entities/iplatform_ops/iplatform_simulation_operations.py +315 -0
  57. idmtools/entities/iplatform_ops/iplatform_suite_operations.py +322 -0
  58. idmtools/entities/iplatform_ops/iplatform_workflowitem_operations.py +301 -0
  59. idmtools/entities/iplatform_ops/utils.py +185 -0
  60. idmtools/entities/itask.py +316 -0
  61. idmtools/entities/iworkflow_item.py +167 -0
  62. idmtools/entities/platform_requirements.py +20 -0
  63. idmtools/entities/relation_type.py +14 -0
  64. idmtools/entities/simulation.py +255 -0
  65. idmtools/entities/suite.py +188 -0
  66. idmtools/entities/task_proxy.py +37 -0
  67. idmtools/entities/templated_simulation.py +325 -0
  68. idmtools/frozen/frozen_dict.py +71 -0
  69. idmtools/frozen/frozen_list.py +66 -0
  70. idmtools/frozen/frozen_set.py +86 -0
  71. idmtools/frozen/frozen_tuple.py +18 -0
  72. idmtools/frozen/frozen_utils.py +179 -0
  73. idmtools/frozen/ifrozen.py +66 -0
  74. idmtools/plugins/__init__.py +5 -0
  75. idmtools/plugins/git_commit.py +117 -0
  76. idmtools/registry/__init__.py +4 -0
  77. idmtools/registry/experiment_specification.py +105 -0
  78. idmtools/registry/functions.py +28 -0
  79. idmtools/registry/hook_specs.py +132 -0
  80. idmtools/registry/master_plugin_registry.py +51 -0
  81. idmtools/registry/platform_specification.py +138 -0
  82. idmtools/registry/plugin_specification.py +129 -0
  83. idmtools/registry/task_specification.py +104 -0
  84. idmtools/registry/utils.py +119 -0
  85. idmtools/services/__init__.py +5 -0
  86. idmtools/services/ipersistance_service.py +135 -0
  87. idmtools/services/platforms.py +13 -0
  88. idmtools/utils/__init__.py +5 -0
  89. idmtools/utils/caller.py +24 -0
  90. idmtools/utils/collections.py +246 -0
  91. idmtools/utils/command_line.py +45 -0
  92. idmtools/utils/decorators.py +300 -0
  93. idmtools/utils/display/__init__.py +22 -0
  94. idmtools/utils/display/displays.py +181 -0
  95. idmtools/utils/display/settings.py +25 -0
  96. idmtools/utils/dropbox_location.py +30 -0
  97. idmtools/utils/entities.py +127 -0
  98. idmtools/utils/file.py +72 -0
  99. idmtools/utils/file_parser.py +151 -0
  100. idmtools/utils/filter_simulations.py +182 -0
  101. idmtools/utils/filters/__init__.py +5 -0
  102. idmtools/utils/filters/asset_filters.py +88 -0
  103. idmtools/utils/general.py +286 -0
  104. idmtools/utils/gitrepo.py +336 -0
  105. idmtools/utils/hashing.py +239 -0
  106. idmtools/utils/info.py +124 -0
  107. idmtools/utils/json.py +82 -0
  108. idmtools/utils/language.py +107 -0
  109. idmtools/utils/local_os.py +40 -0
  110. idmtools/utils/time.py +22 -0
  111. idmtools-0.0.2.dist-info/METADATA +120 -0
  112. idmtools-0.0.2.dist-info/RECORD +116 -0
  113. idmtools-0.0.2.dist-info/entry_points.txt +9 -0
  114. idmtools-0.0.2.dist-info/licenses/LICENSE.TXT +3 -0
  115. idmtools-0.0.0.dev0.dist-info/METADATA +0 -41
  116. idmtools-0.0.0.dev0.dist-info/RECORD +0 -5
  117. {idmtools-0.0.0.dev0.dist-info → idmtools-0.0.2.dist-info}/WHEEL +0 -0
  118. {idmtools-0.0.0.dev0.dist-info → idmtools-0.0.2.dist-info}/top_level.txt +0 -0
idmtools/__init__.py CHANGED
@@ -1,8 +1,27 @@
1
- """
2
- idmtools - Placeholder Package
3
-
4
- This is a placeholder package to reserve the name on PyPI.
5
- The actual package will be published later.
6
- """
7
-
8
- __version__ = "0.0.0.dev0"
1
+ """idmtools core package.
2
+
3
+ This init installs a system exception hook for idmtools.
4
+ It also ensures the configuration is loaded.
5
+
6
+ Copyright 2021, Bill & Melinda Gates Foundation. All rights reserved.
7
+ """
8
+ import sys
9
+ from idmtools.core.exceptions import idmtools_error_handler
10
+ from idmtools.config.idm_config_parser import IdmConfigParser # noqa: F401
11
+
12
+ try:
13
+ from importlib.metadata import version, PackageNotFoundError
14
+ except ImportError:
15
+ # Python < 3.8
16
+ from importlib_metadata import version, PackageNotFoundError
17
+
18
+ try:
19
+ __version__ = version("idmtools") # Use your actual package name
20
+ except PackageNotFoundError:
21
+ # Package not installed, use fallback
22
+ __version__ = "0.0.0+unknown"
23
+
24
+
25
+ # only set exception hook if it has not been overridden
26
+ if sys.excepthook == sys.__excepthook__:
27
+ sys.excepthook = idmtools_error_handler
@@ -0,0 +1,5 @@
1
+ """idmtools analyzer framework
2
+
3
+ Copyright 2021, Bill & Melinda Gates Foundation. All rights reserved.
4
+ """
5
+ # flake8: noqa F821
@@ -0,0 +1,89 @@
1
+ """idmtools add analyzer.
2
+
3
+ More of an example.
4
+
5
+ Copyright 2021, Bill & Melinda Gates Foundation. All rights reserved.
6
+ """
7
+ import os
8
+ from idmtools.entities.ianalyzer import IAnalyzer, ANALYZABLE_ITEM
9
+
10
+
11
+ class AddAnalyzer(IAnalyzer):
12
+ """
13
+ A simple base class to add analyzers.
14
+
15
+ Examples:
16
+ .. literalinclude:: ../../examples/analyzers/example_analysis_AddAnalyzer.py
17
+ """
18
+
19
+ def __init__(self, filenames=None, output_path='output'):
20
+ """
21
+ Initialize our analyzer.
22
+
23
+ Args:
24
+ filenames: Filename to fetch
25
+ output_path: Path to write output to
26
+ """
27
+ super().__init__()
28
+ self.output_path = output_path
29
+ self.filenames = filenames or []
30
+
31
+ # We only want the raw files -> disable parsing
32
+ self.parse = True
33
+
34
+ def filter(self, item: ANALYZABLE_ITEM) -> bool:
35
+ """
36
+ Determine whether the given item should be included by this analyzer.
37
+
38
+ This implementation accepts all items unconditionally.
39
+
40
+ Args:
41
+ item (ANALYZABLE_ITEM): The item to be evaluated.
42
+
43
+ Returns:
44
+ bool: Always returns True, meaning all items are accepted.
45
+ """
46
+ return True # download them all!
47
+
48
+ def initialize(self):
49
+ """
50
+ Initialize our analyzer before running it.
51
+
52
+ We use this to create our output directory.
53
+
54
+ Returns:
55
+ None
56
+ """
57
+ self.output_path = os.path.join(self.working_dir, self.output_path)
58
+ os.makedirs(self.output_path, exist_ok=True)
59
+
60
+ def map(self, data, item: ANALYZABLE_ITEM):
61
+ """
62
+ Run this on each item and the files we retrieve.
63
+
64
+ Args:
65
+ data: Map of filesnames -> content
66
+ item: Item we are mapping
67
+
68
+ Returns:
69
+ Values added up
70
+ """
71
+ number = int(list(data.values())[0].split()[10])
72
+ result = number + 100
73
+ return result
74
+
75
+ # ck4, should we pass objects as the keys? e.g. Item-type, not just their id
76
+ def reduce(self, data):
77
+ """
78
+ Combine all the data we mapped.
79
+
80
+ Args:
81
+ data: Map of results in form Item -> map results
82
+
83
+ Returns:
84
+ Sum of all the results
85
+ """
86
+ # data is currently a dict with item_id: value entries
87
+ value = sum(data.values())
88
+ print(value)
89
+ return value
@@ -0,0 +1,490 @@
1
+ """idmtools Analyzer manager.
2
+
3
+ AnalyzerManager is the "driver" of analysis. Analysis is mostly a map reduce operation.
4
+
5
+ Copyright 2021, Bill & Melinda Gates Foundation. All rights reserved.
6
+ """
7
+ import os
8
+ import sys
9
+ import time
10
+ from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
11
+ from logging import getLogger, DEBUG
12
+ from typing import NoReturn, List, Dict, Tuple, Optional, TYPE_CHECKING
13
+ from tqdm import tqdm
14
+ from idmtools import IdmConfigParser
15
+ from idmtools.analysis.map_worker_entry import map_item
16
+ from idmtools.core import NoPlatformException
17
+ from idmtools.core.enums import ItemType
18
+ from idmtools.core.interfaces.ientity import IEntity
19
+ from idmtools.core.logging import VERBOSE, SUCCESS
20
+ from idmtools.entities.ianalyzer import IAnalyzer
21
+ from idmtools.utils.language import on_off, verbose_timedelta
22
+
23
+ if TYPE_CHECKING: # pragma: no cover
24
+ from idmtools.entities.iplatform import IPlatform
25
+
26
+ logger = getLogger(__name__)
27
+ user_logger = getLogger('user')
28
+
29
+
30
+ def pool_worker_initializer(func, analyzers, platform: 'IPlatform') -> NoReturn:
31
+ """
32
+ Initialize the pool worker, which allows the process pool to associate the analyzers, cache, and path mapping to the function executed to retrieve data.
33
+
34
+ Using an initializer improves performance.
35
+
36
+ Args:
37
+ func: The function that the pool will call.
38
+ analyzers: The list of all analyzers to run.
39
+ platform: The platform to communicate with to retrieve files from.
40
+
41
+ Returns:
42
+ None
43
+ """
44
+ func.analyzers = analyzers
45
+ func.platform = platform
46
+
47
+
48
+ class AnalyzeManager:
49
+ """
50
+ Analyzer Manager Class. This is the main driver of analysis.
51
+ """
52
+ ANALYZE_TIMEOUT = 3600 * 8 # Maximum seconds before timing out - set to 8 hours
53
+ WAIT_TIME = 1.15 # How much time to wait between check if the analysis is done
54
+ EXCEPTION_KEY = '__EXCEPTION__'
55
+
56
+ class TimeOutException(Exception):
57
+ """
58
+ TimeOutException is raised when the analysis times out.
59
+ """
60
+ pass
61
+
62
+ class ItemsNotReady(Exception):
63
+ """
64
+ ItemsNotReady is raised when items to be analyzed are still running.
65
+
66
+ Notes:
67
+ TODO - Add doc_link
68
+ """
69
+ pass
70
+
71
+ def __init__(self, platform: 'IPlatform' = None, configuration: dict = None,
72
+ ids: List[Tuple[str, ItemType]] = None,
73
+ analyzers: List[IAnalyzer] = None, working_dir: str = None,
74
+ partial_analyze_ok: bool = False, max_items: Optional[int] = None, verbose: bool = True,
75
+ force_manager_working_directory: bool = False,
76
+ exclude_ids: List[str] = None, analyze_failed_items: bool = False,
77
+ max_workers: Optional[int] = None, executor_type: str = 'process'):
78
+ """
79
+ Initialize the AnalyzeManager.
80
+
81
+ Args:
82
+ platform (IPlatform): Platform
83
+ configuration (dict, optional): Initial Configuration. Defaults to None.
84
+ ids (Tuple[str, ItemType], optional): List of ids as pair of Tuple and ItemType. Defaults to None.
85
+ analyzers (List[IAnalyzer], optional): List of Analyzers. Defaults to None.
86
+ working_dir (str, optional): The working directory. Defaults to os.getcwd().
87
+ partial_analyze_ok (bool, optional): Whether partial analysis is ok. When this is True, Experiments in progress or Failed can be analyzed. Defaults to False.
88
+ max_items (int, optional): Max Items to analyze. Useful when developing and testing an Analyzer. Defaults to None.
89
+ verbose (bool, optional): Print extra information about analysis. Defaults to True.
90
+ force_manager_working_directory (bool, optional): [description]. Defaults to False.
91
+ exclude_ids (List[str], optional): [description]. Defaults to None.
92
+ analyze_failed_items (bool, optional): Allows analyzing of failed items. Useful when you are trying to aggregate items that have failed. Defaults to False.
93
+ max_workers (int, optional): Set the max workers. If not provided, falls back to the configuration item *max_threads*. If max_workers is not set in configuration, defaults to CPU count
94
+ executor_type: (str): Whether to use process or thread pooling. Process pooling is more efficient but threading might be required in some environments
95
+ """
96
+ super().__init__()
97
+ if working_dir is None:
98
+ working_dir = os.getcwd()
99
+ if executor_type.lower() in ['process', 'thread']:
100
+ self.executor_type = executor_type.lower()
101
+ else:
102
+ raise ValueError(f'{executor_type} is not a valid type for executor_type. Choose either "process" or "thread"')
103
+
104
+ self.configuration = configuration or {}
105
+
106
+ # load platform from context or from passed in value
107
+ self.platform = platform
108
+ self.__check_for_platform_from_context(platform)
109
+ if max_workers is None:
110
+ # check for max workers on platform, then in common
111
+ if self.platform and hasattr(self.platform, '_config_block') and IdmConfigParser.get_option(self.platform._config_block, "max_workers", None):
112
+ self.configuration['max_workers'] = int(IdmConfigParser.get_option(self.platform._config_block, "max_workers", None))
113
+ elif IdmConfigParser().get_option('COMMON', 'max_workers', None):
114
+ self.configuration['max_workers'] = int(IdmConfigParser().get_option('COMMON', 'max_workers'))
115
+
116
+ # validate max_workers
117
+ if max_workers is not None and max_workers < 1:
118
+ raise ValueError("max_workers must be greater or equal to one")
119
+ # ensure max workers is int
120
+ self.max_processes = max_workers if max_workers is not None else self.configuration.get('max_workers', os.cpu_count())
121
+ if logger.isEnabledFor(DEBUG):
122
+ logger.debug(f'AnalyzeManager set to {self.max_processes}')
123
+
124
+ # Should we continue analyzing even when we encounter an error?
125
+ self.continue_on_error = False
126
+
127
+ # should we attempt to analyze failed items
128
+ self.analyze_failed_items = analyze_failed_items
129
+
130
+ # analyze at most this many items, regardless of how many have been given
131
+ self.max_items_to_analyze = max_items
132
+
133
+ # allows analysis to be performed even if some items are not ready for analysis
134
+ self.partial_analyze_ok = partial_analyze_ok or (self.max_items_to_analyze is not None)
135
+
136
+ # Each analyzers results will be in the working_dir directory if not specified by them directly.
137
+ # force_wd overrides this by forcing all results to be in working_dir .
138
+ self.working_dir = working_dir
139
+ self.force_wd = force_manager_working_directory
140
+
141
+ # Take the provided ids and determine the full set of unique root items (e.g. simulations) in them to analyze
142
+ logger.debug("Load information about items from platform")
143
+ ids = list(set(ids or list())) # uniquify
144
+ items: List[IEntity] = []
145
+ for oid, otype in ids:
146
+ logger.debug(f'Getting metadata for {oid} and {otype}')
147
+ item = self.platform.get_item(oid, otype, force=True, raw=True)
148
+ item.uid = str(item.id)
149
+ items.append(item)
150
+ self.potential_items: List[IEntity] = []
151
+
152
+ for i in items:
153
+ logger.debug(f'Flattening items for {i.uid}')
154
+ self.potential_items.extend(self.platform.flatten_item(item=i, raw=True))
155
+
156
+ # These are leaf items to be ignored in analysis. Prune them from analysis.
157
+ self.exclude_ids = exclude_ids or []
158
+ for index, oid in enumerate(self.exclude_ids):
159
+ self.exclude_ids[index] = str(oid)
160
+ self.potential_items = [item for item in self.potential_items if item.uid not in self.exclude_ids]
161
+ for item in self.potential_items:
162
+ item.platform = self.platform
163
+
164
+ logger.debug(f"Potential items to analyze: {len(self.potential_items)}")
165
+
166
+ self._items = dict() # filled in later by _get_items_to_analyze
167
+
168
+ self.analyzers = analyzers or list()
169
+ self.verbose = verbose
170
+
171
+ def __check_for_platform_from_context(self, platform) -> 'IPlatform': # noqa: F821
172
+ """
173
+ Try to determine platform of current object from self or current platform.
174
+
175
+ Args:
176
+ platform: Passed in platform object
177
+
178
+ Raises:
179
+ NoPlatformException: when no platform is on current context
180
+ Returns:
181
+ Platform object
182
+ """
183
+ if self.platform is None:
184
+ # check context for current platform
185
+ if platform is None:
186
+ from idmtools.core.context import CURRENT_PLATFORM
187
+ if CURRENT_PLATFORM is None:
188
+ raise NoPlatformException("No Platform defined on object, in current context, or passed to run")
189
+ platform = CURRENT_PLATFORM
190
+ self.platform = platform
191
+ return self.platform
192
+
193
+ def add_item(self, item: IEntity) -> NoReturn:
194
+ """
195
+ Add an additional item for analysis.
196
+
197
+ Args:
198
+ item: The new item to add for analysis.
199
+
200
+ Returns:
201
+ None
202
+ """
203
+ self.potential_items.extend(self.platform.flatten_item(item=item, raw=True))
204
+
205
+ def _get_items_to_analyze(self) -> Dict[str, IEntity]:
206
+ """
207
+ Get a list of items derived from :meth:`self._items` that are available to analyze.
208
+
209
+ Returns:
210
+ A list of :class:`~idmtools.entities.iitem.IItem` objects.
211
+
212
+ """
213
+ # First sort items by whether they can currently be analyzed
214
+ can_analyze = {}
215
+ cannot_analyze = {}
216
+ for item in self.potential_items:
217
+ valid = self.platform.validate_item_for_analysis(item, self.analyze_failed_items)
218
+ if valid:
219
+ can_analyze[item.uid] = item
220
+ else:
221
+ cannot_analyze[item.uid] = item
222
+
223
+ # now consider item limiting arguments
224
+ if self.partial_analyze_ok:
225
+ if self.max_items_to_analyze is not None:
226
+ return {item.uid: item for item in list(can_analyze.values())[0:self.max_items_to_analyze]}
227
+ return can_analyze
228
+
229
+ if len(cannot_analyze) > 0:
230
+ raise self.ItemsNotReady('There are %d items that cannot be analyzed and partial_analyze_ok is off.' %
231
+ len(cannot_analyze))
232
+
233
+ return can_analyze
234
+
235
+ def add_analyzer(self, analyzer: IAnalyzer) -> NoReturn:
236
+ """
237
+ Add another analyzer to use on the items to be analyzed.
238
+
239
+ Args:
240
+ analyzer: An analyzer object (:class:`~idmtools.entities.ianalyzer.IAnalyzer`).
241
+
242
+ Returns:
243
+ None
244
+ """
245
+ self.analyzers.append(analyzer)
246
+
247
+ def _update_analyzer_uids(self) -> NoReturn:
248
+ """
249
+ Ensure that each analyzer has a unique ID in this context by updating them as needed.
250
+
251
+ Returns:
252
+ None
253
+ """
254
+ unique_uids = {analyzer.uid for analyzer in self.analyzers}
255
+ if len(unique_uids) < len(self.analyzers):
256
+ for i in range(len(self.analyzers)):
257
+ self.analyzers[i].uid += f'-{i}'
258
+ logger.debug(f'Analyzer {i.__class__} id set to {self.analyzers[i].uid}')
259
+
260
+ def _initialize_analyzers(self) -> NoReturn:
261
+ """
262
+ Do the steps needed to prepare analyzers for item analysis.
263
+
264
+ Returns:
265
+ None
266
+ """
267
+ logger.debug("Initializing Analyzers")
268
+ # Setup the working directory and call initialize() on each analyzer
269
+ for analyzer in self.analyzers:
270
+ if self.force_wd:
271
+ analyzer.working_dir = self.working_dir
272
+ else:
273
+ analyzer.working_dir = analyzer.working_dir or self.working_dir
274
+
275
+ if logger.isEnabledFor(DEBUG):
276
+ logger.debug(f"Analyzer working directory set to {analyzer.working_dir}")
277
+ analyzer.initialize()
278
+
279
+ # make sure each analyzer in self.analyzers has a unique uid
280
+ self._update_analyzer_uids()
281
+
282
+ def _print_configuration(self, n_items: int, n_processes: int) -> NoReturn:
283
+ """
284
+ Display some information about an ongoing analysis.
285
+
286
+ Args:
287
+ n_items: The number of items being analyzed.
288
+ n_processes: The number of active item processing handlers.
289
+
290
+ Returns:
291
+ None
292
+ """
293
+ n_ignored_items = len(self.potential_items) - n_items
294
+ user_logger.log(VERBOSE, 'Analyze Manager')
295
+ user_logger.log(VERBOSE, f' | {n_items} item(s) selected for analysis')
296
+ user_logger.log(VERBOSE, f' | partial_analyze_ok is {self.partial_analyze_ok}, max_items is '
297
+ f'{self.max_items_to_analyze}, and {n_ignored_items} item(s) are being ignored')
298
+ user_logger.log(VERBOSE, ' | Analyzer(s): ')
299
+ for analyzer in self.analyzers:
300
+ user_logger.log(VERBOSE, f' | - {analyzer.uid} File parsing: {on_off(analyzer.parse)} / Use '
301
+ f'cache: {on_off(hasattr(analyzer, "cache"))}')
302
+ if hasattr(analyzer, 'need_dir_map'):
303
+ user_logger.log(VERBOSE, f' | (Directory map: {on_off(analyzer.need_dir_map)}')
304
+ user_logger.log(VERBOSE, f' | Pool of {n_processes} analyzing {self.executor_type}(es)')
305
+
306
+ def _run_and_wait_for_mapping(self, executor) -> Tuple[Dict, bool]:
307
+ """
308
+ Run and manage the mapping call on each item.
309
+
310
+ Args:
311
+ executor: A pool of workers.
312
+
313
+ Returns:
314
+ False if an exception occurred processing **.map** on any item; otherwise True (succeeded).
315
+
316
+ """
317
+ # add items to process (map)
318
+ n_items = len(self._items)
319
+ logger.debug(f"Number of items for analysis: {n_items}")
320
+ logger.debug("Mapping the items for analysis")
321
+ futures = dict()
322
+ results = dict()
323
+ status = True
324
+ # create status bar and then queue our futures
325
+ with tqdm(total=len(self._items)) as progress:
326
+ for i in self._items.values():
327
+ future = executor.submit(map_item, i)
328
+ future.add_done_callback(lambda p: progress.update())
329
+ futures[future] = i
330
+
331
+ # wait on our futures to complete, catch exceptions, and aggregate results
332
+ for future in as_completed(futures.keys()):
333
+ if future.exception():
334
+ status = False
335
+ ex = future.exception()
336
+ user_logger.error(ex)
337
+ if not self.continue_on_error:
338
+ raise ex
339
+ else:
340
+ results[futures[future]] = future.result()
341
+
342
+ logger.debug(f"Result fetching status: : {status}")
343
+ return results, status
344
+
345
+ def _run_and_wait_for_reducing(self, executor, results) -> dict:
346
+ """
347
+ Run and manage the reduce call on the combined item results (by analyzer).
348
+
349
+ Args:
350
+ executor: A pool of workers.
351
+
352
+ Returns:
353
+ An analyzer ID keyed dictionary of finalize results.
354
+
355
+ """
356
+ # the keys in self.cache from map() calls are expected to be item ids. Each keyed value
357
+ # contains analyzer_id: item_results_for_analyzer entries.
358
+ logger.debug("Running reduce results")
359
+ futures = {}
360
+ finalize_results = {}
361
+ # create a progress bar
362
+ with tqdm(total=len(self.analyzers), desc="Running Analyzer Reduces") as progress:
363
+ # for each analyzer, queue our futures
364
+ for analyzer in self.analyzers:
365
+ logger.debug(f"Gather data for {analyzer.uid}")
366
+ item_data_for_analyzer = {}
367
+ for item, data in results.items():
368
+ if analyzer.uid in data:
369
+ item_data_for_analyzer[item] = data[analyzer.uid]
370
+ if item_data_for_analyzer.__len__() == 0:
371
+ user_logger.warning(f"Note: {analyzer.uid} has no simulation data to analyze. Please verify the filter or map function of the analyzer.")
372
+ future = executor.submit(analyzer.reduce, item_data_for_analyzer)
373
+ future.add_done_callback(lambda p: progress.update())
374
+
375
+ logger.debug(f"Queueing {analyzer.uid}")
376
+ futures[future] = analyzer.uid
377
+
378
+ # wait on our futures, catch exceptions, and aggregate results
379
+ logger.debug("Waiting for results")
380
+ for future in as_completed(futures.keys()):
381
+ if future.exception():
382
+ user_logger.error(f'Reduce for Analyzer {futures[future]} failed')
383
+ user_logger.exception(future.exception())
384
+ user_logger.error("See log for details")
385
+ if not self.continue_on_error:
386
+ sys.exit(-1)
387
+ else:
388
+ finalize_results[futures[future]] = future.result()
389
+ if logger.isEnabledFor(DEBUG):
390
+ logger.debug("Finished reducing results")
391
+ for future in futures.keys():
392
+ future.cancel()
393
+ return finalize_results
394
+
395
+ def analyze(self) -> bool:
396
+ """
397
+ Process the provided items with the provided analyzers. This is the main driver method of :class:`AnalyzeManager`.
398
+
399
+ Args:
400
+ kwargs: extra parameters
401
+
402
+ Returns:
403
+ True on success; False on failure/exception.
404
+ """
405
+ start_time = time.time()
406
+
407
+ # If no analyzers or simulations have been provided, there is nothing to do
408
+
409
+ if len(self.analyzers) == 0:
410
+ user_logger.error('No analyzers were provided; cannot run analysis.')
411
+ return False
412
+ self._initialize_analyzers()
413
+
414
+ if len(self.potential_items) == 0:
415
+ user_logger.error('No items were provided; cannot run analysis.')
416
+ return False
417
+ # trim processing to those items that are ready and match requested limits
418
+ self._items: Dict[str, IEntity] = self._get_items_to_analyze()
419
+
420
+ if len(self._items) == 0:
421
+ user_logger.error('No items are ready; cannot run analysis.')
422
+ return False
423
+
424
+ # initialize mapping results cache/storage
425
+ n_items = len(self._items)
426
+ n_processes = min(self.max_processes, max(n_items, 1))
427
+
428
+ logger.info(f'Analyzing {n_items}')
429
+
430
+ # do any platform-specific initializations
431
+ logger.debug("Triggering per group functions")
432
+ for analyzer in self.analyzers:
433
+ analyzer.per_group(items=self._items)
434
+
435
+ if self.verbose:
436
+ self._print_configuration(n_items, n_processes)
437
+
438
+ no_print_config_exists = False
439
+ # Before we initialize processes, ensure no warning about config are set
440
+ if 'IDMTOOLS_NO_PRINT_CONFIG_USED' not in os.environ:
441
+ os.environ['IDMTOOLS_NO_PRINT_CONFIG_USED'] = "1"
442
+ os.environ['IDMTOOLS_HIDE_DEV_WARNING'] = "1"
443
+ os.environ['IDMTOOLS_NO_CONFIG_WARNING'] = "1"
444
+ else:
445
+ no_print_config_exists = True
446
+
447
+ # create worker pool
448
+ try:
449
+ # To ensure subprocesses reuse same config file, pass it through environment vars
450
+ config_file = IdmConfigParser().get_config_path()
451
+ if config_file:
452
+ os.environ['IDMTOOLS_CONFIG_FILE'] = config_file
453
+
454
+ # our options for our executor
455
+ opts = dict(max_workers=n_processes, initializer=pool_worker_initializer, initargs=(map_item, self.analyzers, self.platform))
456
+ # determine type. Most cases we want a process, but sometimes(like in Jupyter notebooks, we want to use threads)
457
+ if self.executor_type == 'process':
458
+ executor = ProcessPoolExecutor(**opts)
459
+ else:
460
+ executor = ThreadPoolExecutor(**opts)
461
+
462
+ map_results, status = self._run_and_wait_for_mapping(executor)
463
+ finalize_results = self._run_and_wait_for_reducing(executor, map_results)
464
+
465
+ finally:
466
+ # because of debug mode, we have to leave executor and let python handle the shutdown through del
467
+ # see https://youtrack.jetbrains.com/issue/PY-34432
468
+ os.environ['NO_LOGGING_INIT'] = 'n'
469
+ logger.debug("Shutting down workers")
470
+
471
+ for analyzer in self.analyzers:
472
+ analyzer.results = finalize_results[analyzer.uid]
473
+
474
+ logger.debug("Destroying analyzers")
475
+ for analyzer in self.analyzers:
476
+ analyzer.destroy()
477
+
478
+ if not no_print_config_exists:
479
+ del os.environ['IDMTOOLS_NO_PRINT_CONFIG_USED']
480
+ del os.environ['IDMTOOLS_HIDE_DEV_WARNING']
481
+ del os.environ['IDMTOOLS_NO_CONFIG_WARNING']
482
+ if 'IDMTOOLS_CONFIG_FILE' in os.environ:
483
+ del os.environ['IDMTOOLS_CONFIG_FILE']
484
+
485
+ if self.verbose:
486
+ total_time = time.time() - start_time
487
+ time_str = verbose_timedelta(total_time)
488
+ user_logger.log(SUCCESS, '\r | Analysis complete. Took {} '
489
+ '(~ {:.3f} per item)'.format(time_str, total_time / n_items))
490
+ return True