idmtools-platform-comps 0.0.0.dev0__py3-none-any.whl → 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- idmtools_platform_comps/__init__.py +25 -8
- idmtools_platform_comps/cli/__init__.py +4 -0
- idmtools_platform_comps/cli/cli_functions.py +50 -0
- idmtools_platform_comps/cli/comps.py +492 -0
- idmtools_platform_comps/comps_cli.py +48 -0
- idmtools_platform_comps/comps_operations/__init__.py +6 -0
- idmtools_platform_comps/comps_operations/asset_collection_operations.py +263 -0
- idmtools_platform_comps/comps_operations/experiment_operations.py +569 -0
- idmtools_platform_comps/comps_operations/simulation_operations.py +678 -0
- idmtools_platform_comps/comps_operations/suite_operations.py +228 -0
- idmtools_platform_comps/comps_operations/workflow_item_operations.py +269 -0
- idmtools_platform_comps/comps_platform.py +309 -0
- idmtools_platform_comps/plugin_info.py +168 -0
- idmtools_platform_comps/ssmt_operations/__init__.py +6 -0
- idmtools_platform_comps/ssmt_operations/simulation_operations.py +77 -0
- idmtools_platform_comps/ssmt_operations/workflow_item_operations.py +73 -0
- idmtools_platform_comps/ssmt_platform.py +44 -0
- idmtools_platform_comps/ssmt_work_items/__init__.py +4 -0
- idmtools_platform_comps/ssmt_work_items/comps_work_order_task.py +29 -0
- idmtools_platform_comps/ssmt_work_items/comps_workitems.py +113 -0
- idmtools_platform_comps/ssmt_work_items/icomps_workflowitem.py +71 -0
- idmtools_platform_comps/ssmt_work_items/work_order.py +54 -0
- idmtools_platform_comps/utils/__init__.py +4 -0
- idmtools_platform_comps/utils/assetize_output/__init__.py +4 -0
- idmtools_platform_comps/utils/assetize_output/assetize_output.py +125 -0
- idmtools_platform_comps/utils/assetize_output/assetize_ssmt_script.py +144 -0
- idmtools_platform_comps/utils/base_singularity_work_order.json +6 -0
- idmtools_platform_comps/utils/download/__init__.py +4 -0
- idmtools_platform_comps/utils/download/download.py +178 -0
- idmtools_platform_comps/utils/download/download_ssmt.py +81 -0
- idmtools_platform_comps/utils/download_experiment.py +116 -0
- idmtools_platform_comps/utils/file_filter_workitem.py +519 -0
- idmtools_platform_comps/utils/general.py +358 -0
- idmtools_platform_comps/utils/linux_mounts.py +73 -0
- idmtools_platform_comps/utils/lookups.py +123 -0
- idmtools_platform_comps/utils/package_version.py +489 -0
- idmtools_platform_comps/utils/python_requirements_ac/__init__.py +4 -0
- idmtools_platform_comps/utils/python_requirements_ac/create_asset_collection.py +155 -0
- idmtools_platform_comps/utils/python_requirements_ac/install_requirements.py +109 -0
- idmtools_platform_comps/utils/python_requirements_ac/requirements_to_asset_collection.py +374 -0
- idmtools_platform_comps/utils/python_version.py +40 -0
- idmtools_platform_comps/utils/scheduling.py +154 -0
- idmtools_platform_comps/utils/singularity_build.py +491 -0
- idmtools_platform_comps/utils/spatial_output.py +76 -0
- idmtools_platform_comps/utils/ssmt_utils/__init__.py +6 -0
- idmtools_platform_comps/utils/ssmt_utils/common.py +70 -0
- idmtools_platform_comps/utils/ssmt_utils/file_filter.py +568 -0
- idmtools_platform_comps/utils/sweeping.py +162 -0
- idmtools_platform_comps-0.0.2.dist-info/METADATA +100 -0
- idmtools_platform_comps-0.0.2.dist-info/RECORD +62 -0
- idmtools_platform_comps-0.0.2.dist-info/entry_points.txt +9 -0
- idmtools_platform_comps-0.0.2.dist-info/licenses/LICENSE.TXT +3 -0
- {idmtools_platform_comps-0.0.0.dev0.dist-info → idmtools_platform_comps-0.0.2.dist-info}/top_level.txt +1 -0
- ssmt_image/Dockerfile +52 -0
- ssmt_image/Makefile +21 -0
- ssmt_image/__init__.py +6 -0
- ssmt_image/bootstrap.sh +30 -0
- ssmt_image/build_docker_image.py +161 -0
- ssmt_image/pip.conf +3 -0
- ssmt_image/push_docker_image.py +49 -0
- ssmt_image/requirements.txt +9 -0
- idmtools_platform_comps-0.0.0.dev0.dist-info/METADATA +0 -41
- idmtools_platform_comps-0.0.0.dev0.dist-info/RECORD +0 -5
- {idmtools_platform_comps-0.0.0.dev0.dist-info → idmtools_platform_comps-0.0.2.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,568 @@
|
|
|
1
|
+
"""idmtools ssmt file filter tools.
|
|
2
|
+
|
|
3
|
+
Copyright 2021, Bill & Melinda Gates Foundation. All rights reserved.
|
|
4
|
+
"""
|
|
5
|
+
import argparse
|
|
6
|
+
import glob
|
|
7
|
+
import json
|
|
8
|
+
import os
|
|
9
|
+
import uuid
|
|
10
|
+
from collections import defaultdict
|
|
11
|
+
from concurrent.futures._base import as_completed, Future
|
|
12
|
+
from concurrent.futures.thread import ThreadPoolExecutor
|
|
13
|
+
from logging import DEBUG, getLogger
|
|
14
|
+
from pathlib import PurePath
|
|
15
|
+
from typing import List, Tuple, Set, Callable
|
|
16
|
+
import humanfriendly
|
|
17
|
+
from COMPS.Data import WorkItem, Experiment, Simulation, AssetCollectionFile, AssetCollection, QueryCriteria, CommissionableEntity
|
|
18
|
+
from COMPS.Data.Simulation import SimulationState
|
|
19
|
+
from COMPS.Data.WorkItem import RelationType
|
|
20
|
+
from tabulate import tabulate
|
|
21
|
+
from tqdm import tqdm
|
|
22
|
+
from idmtools_platform_comps.utils.file_filter_workitem import FilenameFormatFunction
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
from common import setup_verbose
|
|
26
|
+
except (FileNotFoundError, ImportError):
|
|
27
|
+
from idmtools_platform_comps.utils.ssmt_utils.common import setup_verbose
|
|
28
|
+
|
|
29
|
+
logger = getLogger(__name__)
|
|
30
|
+
user_logger = getLogger('user')
|
|
31
|
+
# Our Asset Tuple we use to gather data on files
|
|
32
|
+
# The format is Source Filename, Destination Filename, Checksum, and then Filesize
|
|
33
|
+
AssetTuple = Tuple[str, str, uuid.UUID, int]
|
|
34
|
+
SetOfAssets = Set[AssetTuple]
|
|
35
|
+
# Store the Done State
|
|
36
|
+
DONE_STATE = [SimulationState.Failed, SimulationState.Canceled, SimulationState.Succeeded]
|
|
37
|
+
HPC_JOBS_QUERY = QueryCriteria().select_children(['tags', 'hpc_jobs']).orderby('date_created desc')
|
|
38
|
+
|
|
39
|
+
# Define our function that can be used as callbacks for filtering entities
|
|
40
|
+
EntityFilterFunc = Callable[[CommissionableEntity.CommissionableEntity], bool]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def get_common_parser(app_description):
|
|
44
|
+
"""Creates a get common argument parser used with file filter function."""
|
|
45
|
+
parser = argparse.ArgumentParser(app_description)
|
|
46
|
+
parser.add_argument("--file-pattern", nargs="+", help="File Pattern to Filter")
|
|
47
|
+
parser.add_argument("--exclude-pattern", default=None, nargs="+", help="Exclude File Pattern to Filter")
|
|
48
|
+
parser.add_argument(
|
|
49
|
+
"--simulation-prefix-format-str", default="{simulation.id}",
|
|
50
|
+
help="Format for prefix of outputs from simulations. Defaults to the simulation id. When setting, you have access to the full simulation object. "
|
|
51
|
+
"If you are filtering an experiment, you have also have access to experiment object"
|
|
52
|
+
)
|
|
53
|
+
parser.add_argument("--no-simulation-prefix", default=False, action='store_true', help="No simulation prefix. Be careful because this could cause file collisions")
|
|
54
|
+
parser.add_argument("--work-item-prefix-format-str", default=None, help="Format for prefix of workitem outputs. Defaults to None. Useful when combining outputs of multiple work-items")
|
|
55
|
+
parser.add_argument("--assets", default=False, action='store_true', help="Include Assets")
|
|
56
|
+
parser.add_argument("--verbose", default=False, action="store_true", help="Verbose logging")
|
|
57
|
+
parser.add_argument("--pre-run-func", default=None, action='append', help="List of function to run before starting analysis. Useful to load packages up in docker container before run")
|
|
58
|
+
parser.add_argument("--entity-filter-func", default=None, help="Name of function that can be used to filter items")
|
|
59
|
+
parser.add_argument("--filename-format-func", default=None, help="Name of function that can be used to format filenames")
|
|
60
|
+
parser.add_argument("--dry-run", default=False, action="store_true", help="Find files, but don't add")
|
|
61
|
+
return parser
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def gather_files(directory: str, file_patterns: List[str], exclude_patterns: List[str] = None, assets: bool = False, prefix: str = None, filename_format_func: FilenameFormatFunction = None) -> SetOfAssets:
|
|
65
|
+
"""
|
|
66
|
+
Gather file_list.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
directory: Directory to gather from
|
|
70
|
+
file_patterns: List of file patterns
|
|
71
|
+
exclude_patterns: List of patterns to exclude
|
|
72
|
+
assets: Should assets be included
|
|
73
|
+
prefix: Prefix for file_list
|
|
74
|
+
filename_format_func: Function that can format the filename
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
Return files that match patterns.
|
|
78
|
+
"""
|
|
79
|
+
from idmtools.utils.hashing import calculate_md5
|
|
80
|
+
file_list = set()
|
|
81
|
+
# Loop through our patterns
|
|
82
|
+
for pattern in file_patterns:
|
|
83
|
+
# User glob to search each directory using the pattern. We also do full recursion here
|
|
84
|
+
|
|
85
|
+
sd = os.path.join(directory, pattern)
|
|
86
|
+
if logger.isEnabledFor(DEBUG):
|
|
87
|
+
logger.debug(f'Looking for files with pattern {sd}')
|
|
88
|
+
for file in glob.iglob(sd, recursive=True):
|
|
89
|
+
# Ensure it is a file and not a directory
|
|
90
|
+
if logger.isEnabledFor(DEBUG):
|
|
91
|
+
logger.debug(f'{file.encode("ascii", "ignore").decode("utf-8")} matching pattern. Is Dir: {os.path.isdir(file)}. Is Link: {os.path.islink(file)}')
|
|
92
|
+
if os.path.isfile(file):
|
|
93
|
+
if logger.isEnabledFor(DEBUG):
|
|
94
|
+
logger.debug(f'Found file {file.encode("ascii", "ignore").decode("utf-8")}')
|
|
95
|
+
# Create our shortname. This will remove the base directory from the file. Eg
|
|
96
|
+
# If are scanning C:\ABC\, the file C:\ABC\DEF\123.txt will be DEF\123.txt
|
|
97
|
+
short_name = file.replace(directory + os.path.sep, "")
|
|
98
|
+
# Setup destination name which is just joining prefix if it exists
|
|
99
|
+
dest_name = os.path.join(prefix if prefix else '', short_name)
|
|
100
|
+
filesize = os.stat(file).st_size
|
|
101
|
+
if filename_format_func:
|
|
102
|
+
dest_name = filename_format_func(dest_name)
|
|
103
|
+
# Process assets separately than regular files
|
|
104
|
+
if short_name.startswith("Assets"):
|
|
105
|
+
if assets:
|
|
106
|
+
file_list.add((file, dest_name, uuid.UUID(calculate_md5(file)), filesize))
|
|
107
|
+
else:
|
|
108
|
+
file_list.add((file, dest_name, uuid.UUID(calculate_md5(file)), filesize))
|
|
109
|
+
|
|
110
|
+
# Now strip file that match exclude patterns. We do this after since the regular expressions here are a bit more expensive, so a we are at the
|
|
111
|
+
# minimum possible files we must scan as this point. We did possible calculate extra md5s here
|
|
112
|
+
if logger.isEnabledFor(DEBUG):
|
|
113
|
+
logger.debug(f"File count before excluding: {len(file_list)} in {directory}")
|
|
114
|
+
result = set([f for f in file_list if not is_file_excluded(f[0], exclude_patterns)])
|
|
115
|
+
if logger.isEnabledFor(DEBUG):
|
|
116
|
+
logger.debug(f"File count after excluding: {len(file_list)} in {directory}")
|
|
117
|
+
return result
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def is_file_excluded(filename: str, exclude_patterns: List[str]) -> bool:
|
|
121
|
+
"""
|
|
122
|
+
Is file excluded by excluded patterns.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
filename: File to filter
|
|
126
|
+
exclude_patterns: List of file patterns to exclude
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
True is file is excluded
|
|
130
|
+
"""
|
|
131
|
+
for pattern in exclude_patterns:
|
|
132
|
+
if PurePath(filename.lower()).match(pattern.lower()):
|
|
133
|
+
return True
|
|
134
|
+
return False
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def gather_files_from_related(work_item: WorkItem, file_patterns: List[str], exclude_patterns: List[str], assets: bool, simulation_prefix_format_str: str, work_item_prefix_format_str: str, entity_filter_func: EntityFilterFunc,
|
|
138
|
+
filename_format_func: FilenameFormatFunction) -> SetOfAssets: # pragma: no cover
|
|
139
|
+
"""
|
|
140
|
+
Gather files from different related entities.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
work_item: Work item to gather from
|
|
144
|
+
file_patterns: List of File Patterns
|
|
145
|
+
exclude_patterns: List of Exclude patterns
|
|
146
|
+
assets: Should items be gathered from Assets Directory
|
|
147
|
+
simulation_prefix_format_str: Format string for prefix of Simulations
|
|
148
|
+
work_item_prefix_format_str: Format string for prefix of WorkItem
|
|
149
|
+
entity_filter_func: Function to filter entities
|
|
150
|
+
filename_format_func: Filename filter function
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
Set of File Tuples in format Filename, Destination Name, and Checksum
|
|
154
|
+
"""
|
|
155
|
+
file_list = set()
|
|
156
|
+
# Setup threading work using a future list and a ThreadPoolExecutor
|
|
157
|
+
futures = []
|
|
158
|
+
pool = ThreadPoolExecutor()
|
|
159
|
+
if logger.isEnabledFor(DEBUG):
|
|
160
|
+
logger.debug("Filtering experiments")
|
|
161
|
+
filter_experiments(assets, entity_filter_func, exclude_patterns, file_patterns, futures, pool, simulation_prefix_format_str, work_item, filename_format_func=filename_format_func)
|
|
162
|
+
if logger.isEnabledFor(DEBUG):
|
|
163
|
+
logger.debug("Filtering simulations")
|
|
164
|
+
filter_simulations_files(assets, entity_filter_func, exclude_patterns, file_patterns, futures, pool, simulation_prefix_format_str, work_item, filename_format_func=filename_format_func)
|
|
165
|
+
if logger.isEnabledFor(DEBUG):
|
|
166
|
+
logger.debug("Filtering workitems")
|
|
167
|
+
filter_work_items_files(assets, entity_filter_func, exclude_patterns, file_patterns, futures, pool, work_item, work_item_prefix_format_str, filename_format_func=filename_format_func)
|
|
168
|
+
|
|
169
|
+
if logger.isEnabledFor(DEBUG):
|
|
170
|
+
logger.debug("Waiting on filtering to complete")
|
|
171
|
+
# Now wait until scanning items has completed
|
|
172
|
+
for future in tqdm(as_completed(futures), total=len(futures), desc="Filtering relations for files"):
|
|
173
|
+
file_list.update(future.result())
|
|
174
|
+
|
|
175
|
+
if logger.isEnabledFor(DEBUG):
|
|
176
|
+
logger.debug(f"Total Files found: {len(file_list)}")
|
|
177
|
+
return file_list
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def filter_experiments(assets: bool, entity_filter_func: EntityFilterFunc, exclude_patterns_compiles: List, file_patterns: List[str], futures: List[Future], pool: ThreadPoolExecutor, simulation_prefix_format_str: str, work_item: WorkItem,
|
|
181
|
+
filename_format_func: FilenameFormatFunction): # pragma: no cover
|
|
182
|
+
"""
|
|
183
|
+
Filter Experiments outputs using our patterns.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
assets: Assets to filter
|
|
187
|
+
entity_filter_func: Function to filter functions
|
|
188
|
+
exclude_patterns_compiles: List of patterns to exclude
|
|
189
|
+
file_patterns: File patterns to match
|
|
190
|
+
futures: Future queue
|
|
191
|
+
pool: Pool to execute jobs on
|
|
192
|
+
simulation_prefix_format_str: Format string for prefix of Simulations
|
|
193
|
+
work_item: Parent WorkItem
|
|
194
|
+
filename_format_func: Function to filter filenames
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
None
|
|
198
|
+
"""
|
|
199
|
+
# Start with experiments since they are the most complex portion
|
|
200
|
+
experiments: List[Experiment] = work_item.get_related_experiments()
|
|
201
|
+
for experiment in experiments:
|
|
202
|
+
if logger.isEnabledFor(DEBUG):
|
|
203
|
+
logger.debug(f'Running filter on {experiment.name}/{experiment.id}')
|
|
204
|
+
if entity_filter_func(experiment):
|
|
205
|
+
if logger.isEnabledFor(DEBUG):
|
|
206
|
+
logger.debug(f'Loading simulations for filter on {experiment.name}/{experiment.id}')
|
|
207
|
+
# Fetch simulations with the hpc_jobs criteria. This allows us to lookup the directory
|
|
208
|
+
simulations = experiment.get_simulations(HPC_JOBS_QUERY)
|
|
209
|
+
if logger.isEnabledFor(DEBUG):
|
|
210
|
+
logger.debug(f"Total simulations to evaluate {len(simulations)}")
|
|
211
|
+
if len(simulations) > 0:
|
|
212
|
+
filter_experiment_assets(work_item, assets, entity_filter_func, exclude_patterns_compiles, experiment, file_patterns, futures, pool, simulation_prefix_format_str, simulations, filename_format_func=filename_format_func)
|
|
213
|
+
# Loop through each simulation and queue it up for file matching
|
|
214
|
+
filter_simulation_list(assets, entity_filter_func, exclude_patterns_compiles, file_patterns, futures, pool, simulation_prefix_format_str, simulations, work_item, experiment=experiment, filename_format_func=filename_format_func)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def get_simulation_prefix(parent_work_item: WorkItem, simulation: Simulation, simulation_prefix_format_str: str, experiment: Experiment = None) -> str:
|
|
218
|
+
"""
|
|
219
|
+
Get Simulation Prefix.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
parent_work_item: Parent workitem
|
|
223
|
+
simulation: Simulation to form
|
|
224
|
+
simulation_prefix_format_str: Prefix format string
|
|
225
|
+
experiment: Optional experiment to be used with the
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
Name of the simulation
|
|
229
|
+
"""
|
|
230
|
+
prefix = simulation_prefix_format_str.format(simulation=simulation, experiment=experiment, parent_workitem=parent_work_item) if simulation_prefix_format_str else None
|
|
231
|
+
if logger.isEnabledFor(DEBUG):
|
|
232
|
+
logger.debug(f'Simulation Prefix: {prefix}')
|
|
233
|
+
return prefix
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def filter_experiment_assets(
|
|
237
|
+
work_item: WorkItem, assets: bool, entity_filter_func: EntityFilterFunc, exclude_patterns_compiles: List, experiment: Experiment, file_patterns: List[str],
|
|
238
|
+
futures: List[Future], pool: ThreadPoolExecutor, simulation_prefix_format_str: str, simulations: List[Simulation], filename_format_func: FilenameFormatFunction): # pragma: no cover
|
|
239
|
+
"""
|
|
240
|
+
Filter experiment assets. This method uses the first simulation to gather experiment assets.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
work_item: Parent Workitem
|
|
244
|
+
assets: Whether assets should be matched
|
|
245
|
+
entity_filter_func: Entity Filter Function
|
|
246
|
+
exclude_patterns_compiles: List of files to exclude
|
|
247
|
+
experiment: Experiment
|
|
248
|
+
file_patterns: File patterns to filter
|
|
249
|
+
futures: List of futures
|
|
250
|
+
pool: Pool to submit search jobs to
|
|
251
|
+
simulation_prefix_format_str: Format string for simulation
|
|
252
|
+
simulations: List of simulations
|
|
253
|
+
filename_format_func: Name function for filename
|
|
254
|
+
|
|
255
|
+
Returns:
|
|
256
|
+
None
|
|
257
|
+
"""
|
|
258
|
+
# If we should gather assets, use the first simulation. It means we will duplicate some work, but the set will filter out duplicated
|
|
259
|
+
if assets:
|
|
260
|
+
# find the first simulation we can use to gather assets from
|
|
261
|
+
i = 0
|
|
262
|
+
while not entity_filter_func(simulations[i]) and i < len(simulations):
|
|
263
|
+
i += 1
|
|
264
|
+
if i < len(simulations):
|
|
265
|
+
simulation = simulations[i]
|
|
266
|
+
if logger.isEnabledFor(DEBUG):
|
|
267
|
+
logger.debug(f'Loading assets for {experiment.name} from simulation {simulation.id}')
|
|
268
|
+
# create prefix from the format var
|
|
269
|
+
prefix = get_simulation_prefix(work_item, simulation, simulation_prefix_format_str, experiment)
|
|
270
|
+
futures.append(pool.submit(gather_files, directory=simulation.hpc_jobs[0].working_directory, file_patterns=file_patterns, exclude_patterns=exclude_patterns_compiles, assets=assets, prefix=prefix, filename_format_func=filename_format_func))
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def filter_simulations_files(assets: bool, entity_filter_func: EntityFilterFunc, exclude_patterns_compiles: List, file_patterns: List[str], futures: List[Future], pool: ThreadPoolExecutor, simulation_prefix_format_str: str, work_item: WorkItem,
|
|
274
|
+
filename_format_func: FilenameFormatFunction): # pragma: no cover
|
|
275
|
+
"""
|
|
276
|
+
Filter Simulations files.
|
|
277
|
+
|
|
278
|
+
Args:
|
|
279
|
+
assets: Whether assets should be matched
|
|
280
|
+
entity_filter_func: Entity Filter Function
|
|
281
|
+
exclude_patterns_compiles: List of files to exclude
|
|
282
|
+
file_patterns: File patterns to filter
|
|
283
|
+
futures: List of futures
|
|
284
|
+
pool: Pool to submit search jobs to
|
|
285
|
+
simulation_prefix_format_str: Format string for simulation
|
|
286
|
+
work_item:
|
|
287
|
+
filename_format_func: Filename function
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
None
|
|
291
|
+
"""
|
|
292
|
+
# Here we loop through simulations directly added by user. We do not
|
|
293
|
+
simulations: List[Simulation] = work_item.get_related_simulations()
|
|
294
|
+
filter_simulation_list(assets, entity_filter_func, exclude_patterns_compiles, file_patterns, futures, pool, simulation_prefix_format_str, simulations, work_item, filename_format_func=filename_format_func)
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def filter_simulation_list(assets: bool, entity_filter_func: EntityFilterFunc, exclude_patterns_compiles: List, file_patterns: List[str], futures: List[Future], pool: ThreadPoolExecutor, simulation_prefix_format_str: str, simulations: List[Simulation], work_item: WorkItem,
|
|
298
|
+
experiment: Experiment = None, filename_format_func: FilenameFormatFunction = None): # pragma: no cover
|
|
299
|
+
"""
|
|
300
|
+
Filter simulations list. This method is used for experiments and simulations.
|
|
301
|
+
|
|
302
|
+
Args:
|
|
303
|
+
assets: Whether assets should be matched
|
|
304
|
+
entity_filter_func: Entity Filter Function
|
|
305
|
+
exclude_patterns_compiles: List of files to exclude
|
|
306
|
+
file_patterns: File patterns to filter
|
|
307
|
+
futures: List of futures
|
|
308
|
+
pool: Pool to submit search jobs to
|
|
309
|
+
simulation_prefix_format_str: Format string for simulation
|
|
310
|
+
simulations: List of simulations
|
|
311
|
+
work_item: Parent workitem
|
|
312
|
+
experiment: Optional experiment.
|
|
313
|
+
filename_format_func: Filename function
|
|
314
|
+
|
|
315
|
+
Returns:
|
|
316
|
+
None
|
|
317
|
+
"""
|
|
318
|
+
for simulation in simulations:
|
|
319
|
+
if entity_filter_func(simulation):
|
|
320
|
+
if logger.isEnabledFor(DEBUG):
|
|
321
|
+
logger.debug(f'Loading outputs from Simulation {simulation.id} with status of {simulation.state}')
|
|
322
|
+
prefix = get_simulation_prefix(parent_work_item=work_item, experiment=experiment, simulation=simulation, simulation_prefix_format_str=simulation_prefix_format_str)
|
|
323
|
+
if simulation.hpc_jobs is None:
|
|
324
|
+
simulation = simulation.get(simulation.id, HPC_JOBS_QUERY)
|
|
325
|
+
futures.append(pool.submit(gather_files, directory=simulation.hpc_jobs[0].working_directory, file_patterns=file_patterns, exclude_patterns=exclude_patterns_compiles, assets=assets, prefix=prefix, filename_format_func=filename_format_func))
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def filter_work_items_files(assets: bool, entity_filter_func: EntityFilterFunc, exclude_patterns_compiles: List, file_patterns: List[str], futures: List[Future], pool: ThreadPoolExecutor, work_item: WorkItem, work_item_prefix_format_str: str,
|
|
329
|
+
filename_format_func: FilenameFormatFunction): # pragma: no cover
|
|
330
|
+
"""
|
|
331
|
+
Filter work items files.
|
|
332
|
+
|
|
333
|
+
Args:
|
|
334
|
+
assets: Whether assets should be matched
|
|
335
|
+
entity_filter_func: Entity Filter Function
|
|
336
|
+
exclude_patterns_compiles: List of files to exclude
|
|
337
|
+
file_patterns: File patterns to filter
|
|
338
|
+
futures: List of futures
|
|
339
|
+
pool: Pool to submit search jobs to
|
|
340
|
+
work_item: WorkItem
|
|
341
|
+
work_item_prefix_format_str: WorkItemPrefix
|
|
342
|
+
filename_format_func: Filename function
|
|
343
|
+
|
|
344
|
+
Returns:
|
|
345
|
+
None
|
|
346
|
+
"""
|
|
347
|
+
# Here we loop through workitems
|
|
348
|
+
work_items: List[WorkItem] = work_item.get_related_work_items()
|
|
349
|
+
for related_work_item in work_items:
|
|
350
|
+
if entity_filter_func(related_work_item):
|
|
351
|
+
if logger.isEnabledFor(DEBUG):
|
|
352
|
+
logger.debug(f'Loading outputs from WorkItem {related_work_item.name} - {related_work_item.id}')
|
|
353
|
+
prefix = work_item_prefix_format_str.format(work_item=related_work_item, parent_work_item=work_item) if work_item_prefix_format_str else None
|
|
354
|
+
futures.append(pool.submit(gather_files, directory=related_work_item.working_directory, file_patterns=file_patterns, exclude_patterns=exclude_patterns_compiles, assets=assets, prefix=prefix, filename_format_func=filename_format_func))
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def filter_ac_files(wi: WorkItem, patterns, exclude_patterns) -> List[AssetCollectionFile]: # pragma: no cover
|
|
358
|
+
"""
|
|
359
|
+
Filter Asset Collection File.
|
|
360
|
+
|
|
361
|
+
Args:
|
|
362
|
+
wi: WorkItem
|
|
363
|
+
patterns: File patterns
|
|
364
|
+
exclude_patterns: Exclude patterns
|
|
365
|
+
|
|
366
|
+
Returns:
|
|
367
|
+
List of filters asset collection files
|
|
368
|
+
"""
|
|
369
|
+
if logger.isEnabledFor(DEBUG):
|
|
370
|
+
logger.debug('Filtering asset collections')
|
|
371
|
+
relates_acs: List[AssetCollection] = wi.get_related_asset_collections(relation_type=RelationType.DependsOn)
|
|
372
|
+
filtered_ac_files = set()
|
|
373
|
+
for ac in relates_acs:
|
|
374
|
+
ac = ac.get(ac.id, QueryCriteria().select_children("assets"))
|
|
375
|
+
for file in ac.assets:
|
|
376
|
+
file_path = get_asset_file_path(file)
|
|
377
|
+
for pattern in patterns:
|
|
378
|
+
if PurePath(file_path).match(pattern):
|
|
379
|
+
filtered_ac_files.add(file)
|
|
380
|
+
# break out of pattern loop since there was a match
|
|
381
|
+
break
|
|
382
|
+
|
|
383
|
+
return [f for f in filtered_ac_files if not is_file_excluded(get_asset_file_path(f), exclude_patterns)]
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def get_asset_file_path(file):
|
|
387
|
+
"""
|
|
388
|
+
Get asset file path which combined the relative path and filename if relative path is set.
|
|
389
|
+
|
|
390
|
+
Otherwise we use just the filename.
|
|
391
|
+
|
|
392
|
+
Args:
|
|
393
|
+
file: Filename
|
|
394
|
+
|
|
395
|
+
Returns:
|
|
396
|
+
Filename
|
|
397
|
+
"""
|
|
398
|
+
return os.path.join(file.relative_path, file.file_name) if file.relative_path else file.file_name
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
class DuplicateAsset(Exception):
|
|
402
|
+
"""Error for when we encountered output paths that overlap."""
|
|
403
|
+
doc_link: str = "platforms/comps/assetize_output.html#errors"
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
def ensure_no_duplicates(ac_files, files): # pragma: no cover
|
|
407
|
+
"""
|
|
408
|
+
Ensure no duplicates are in asset.
|
|
409
|
+
|
|
410
|
+
Args:
|
|
411
|
+
ac_files: Ac files
|
|
412
|
+
files: Simulation/Experiment/Workitem files
|
|
413
|
+
|
|
414
|
+
Returns:
|
|
415
|
+
None
|
|
416
|
+
|
|
417
|
+
Raises:
|
|
418
|
+
DuplicateAsset - if asset with same output path is found
|
|
419
|
+
"""
|
|
420
|
+
dest_paths = defaultdict(int)
|
|
421
|
+
for file in ac_files:
|
|
422
|
+
fn = os.path.join(file.relative_path, file.file_name) if file.relative_path else file.file_name
|
|
423
|
+
dest_paths[fn] += 1
|
|
424
|
+
for file in files:
|
|
425
|
+
dest_paths[file[1]] += 1
|
|
426
|
+
# we should have one count for all items(1). If we have more than one count, than there are duplicates
|
|
427
|
+
if any([x > 1 for x in set(dest_paths.values())]):
|
|
428
|
+
duplicate_assets = [x for x, count in dest_paths.items() if count > 1]
|
|
429
|
+
error_files = []
|
|
430
|
+
# match up to assets
|
|
431
|
+
for asset in ac_files:
|
|
432
|
+
fn = os.path.join(asset.relative_path, asset.file_name) if asset.relative_path else asset.file_name
|
|
433
|
+
if fn in duplicate_assets:
|
|
434
|
+
error_files.append(f'{fn} from Asset Collections')
|
|
435
|
+
for file in files:
|
|
436
|
+
if file[1] in duplicate_assets:
|
|
437
|
+
error_files.append(f'{file[1]}<{file[0]}> from Experiment, Simulation, or WorkItem')
|
|
438
|
+
nl = "\n"
|
|
439
|
+
raise DuplicateAsset(f"The following assets have duplicate destination paths:{nl} {nl.join(sorted(error_files))}")
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
def print_results(ac_files, files): # pragma: no cover
|
|
443
|
+
"""
|
|
444
|
+
Print Results.
|
|
445
|
+
|
|
446
|
+
Args:
|
|
447
|
+
ac_files: Ac Files
|
|
448
|
+
files: Files
|
|
449
|
+
|
|
450
|
+
Returns:
|
|
451
|
+
None
|
|
452
|
+
"""
|
|
453
|
+
all_files = []
|
|
454
|
+
for file in files:
|
|
455
|
+
all_files.append(dict(filename=file[0], destname=file[1], filesize=file[3]))
|
|
456
|
+
total_file_size = sum([f[3] for f in files])
|
|
457
|
+
for af in ac_files:
|
|
458
|
+
fn = get_asset_file_path(af)
|
|
459
|
+
all_files.append(dict(filename=fn, destname=fn, filesize=af._length))
|
|
460
|
+
with open("file_list.json", 'w') as flist:
|
|
461
|
+
json.dump(all_files, flist, indent=4, sort_keys=True)
|
|
462
|
+
header = all_files[0].keys()
|
|
463
|
+
rows = [x.values() for x in sorted(all_files, key=lambda x: x['destname'])]
|
|
464
|
+
with open("file_list.html", "w") as html_list:
|
|
465
|
+
html_list.write(tabulate(rows, header, tablefmt='html'))
|
|
466
|
+
|
|
467
|
+
print(f'Total asset collection size: {humanfriendly.format_size(total_file_size)}')
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
def apply_custom_filters(args: argparse.Namespace):
|
|
471
|
+
"""
|
|
472
|
+
Apply user defined custom filter functions.
|
|
473
|
+
|
|
474
|
+
The function does the following workflow.
|
|
475
|
+
|
|
476
|
+
1. Check if there is a pre_run_func(s) defined.
|
|
477
|
+
1b) If there are pre-run funcs, run each of those
|
|
478
|
+
2) Is there an entity_filter_func. This function allows us to filter items(Experiment/Simulations/etc) directly. If not defined, we use a default function returns true.
|
|
479
|
+
3) If filename format function is defined, we set that, otherwise we use the default which just uses the original file name
|
|
480
|
+
|
|
481
|
+
Args:
|
|
482
|
+
args: argparse namespace.
|
|
483
|
+
|
|
484
|
+
Returns:
|
|
485
|
+
entity_filter_func and filename format func
|
|
486
|
+
"""
|
|
487
|
+
if args.pre_run_func:
|
|
488
|
+
import pre_run
|
|
489
|
+
for pre_run_func in args.pre_run_func:
|
|
490
|
+
if logger.isEnabledFor(DEBUG):
|
|
491
|
+
logger.debug(f"Calling PreRunFunc: {pre_run_func}")
|
|
492
|
+
getattr(pre_run, args.pre_run_func)()
|
|
493
|
+
# set a default filter function that returns true if none are set
|
|
494
|
+
if args.entity_filter_func:
|
|
495
|
+
import entity_filter_func
|
|
496
|
+
entity_filter_func = getattr(entity_filter_func, args.entity_filter_func)
|
|
497
|
+
else:
|
|
498
|
+
if logger.isEnabledFor(DEBUG):
|
|
499
|
+
logger.debug("Setting default filter function")
|
|
500
|
+
|
|
501
|
+
def default_filter_func(x):
|
|
502
|
+
return True
|
|
503
|
+
|
|
504
|
+
entity_filter_func = default_filter_func
|
|
505
|
+
|
|
506
|
+
if args.filename_format_func:
|
|
507
|
+
import filename_format_func
|
|
508
|
+
fn_format_func = getattr(filename_format_func, args.filename_format_func)
|
|
509
|
+
else:
|
|
510
|
+
def default_format_func(s: str):
|
|
511
|
+
return s
|
|
512
|
+
|
|
513
|
+
fn_format_func = default_format_func
|
|
514
|
+
|
|
515
|
+
return entity_filter_func, fn_format_func
|
|
516
|
+
|
|
517
|
+
|
|
518
|
+
def parse_filter_args_common(args: argparse.Namespace):
|
|
519
|
+
"""
|
|
520
|
+
Parse filter arguments from an argparse namespace.
|
|
521
|
+
|
|
522
|
+
We need this because we use filtering across multiple scripts.
|
|
523
|
+
|
|
524
|
+
Args:
|
|
525
|
+
args: Argparse args
|
|
526
|
+
|
|
527
|
+
Returns:
|
|
528
|
+
entity_filter_func and filename formart func
|
|
529
|
+
"""
|
|
530
|
+
if args.verbose:
|
|
531
|
+
setup_verbose(args)
|
|
532
|
+
if "**" in args.file_pattern:
|
|
533
|
+
args.file_pattern = ["**"]
|
|
534
|
+
entity_filter_func, fn_format_func = apply_custom_filters(args)
|
|
535
|
+
for i, a in enumerate(args.exclude_pattern):
|
|
536
|
+
args.exclude_pattern[i] = a.replace("\\*", "*")
|
|
537
|
+
for i, a in enumerate(args.file_pattern):
|
|
538
|
+
if a.startswith("'") and a.endswith("'"):
|
|
539
|
+
args.file_pattern[i] = a.replace("\\*", "*")
|
|
540
|
+
for i in ['simulation_prefix_format_str', 'work_item_prefix_format_str']:
|
|
541
|
+
si = getattr(args, i)
|
|
542
|
+
if si and si.startswith("'") and si.endswith("'"):
|
|
543
|
+
si = si.strip("'")
|
|
544
|
+
setattr(args, i, si)
|
|
545
|
+
return entity_filter_func, fn_format_func
|
|
546
|
+
|
|
547
|
+
|
|
548
|
+
def filter_files_and_assets(args: argparse.Namespace, entity_filter_func: EntityFilterFunc, wi: WorkItem, filename_format_func: FilenameFormatFunction) -> Tuple[SetOfAssets, List[AssetCollectionFile]]:
|
|
549
|
+
"""
|
|
550
|
+
Filter files and assets using provided parameters.
|
|
551
|
+
|
|
552
|
+
Args:
|
|
553
|
+
args: Argparse details
|
|
554
|
+
entity_filter_func: Optional filter function for entities. This function is ran on every item. If it returns true, we return the item
|
|
555
|
+
wi: WorkItem we are running in
|
|
556
|
+
filename_format_func: Filename format function allows use to customize how we filter filenames for output.
|
|
557
|
+
|
|
558
|
+
Returns:
|
|
559
|
+
Files that matches the filter and the assets that matches the filter as well.
|
|
560
|
+
"""
|
|
561
|
+
files = gather_files_from_related(
|
|
562
|
+
wi, file_patterns=args.file_pattern, exclude_patterns=args.exclude_pattern if args.exclude_pattern else [], assets=args.assets,
|
|
563
|
+
work_item_prefix_format_str=args.work_item_prefix_format_str,
|
|
564
|
+
simulation_prefix_format_str=args.simulation_prefix_format_str if not args.no_simulation_prefix else None,
|
|
565
|
+
entity_filter_func=entity_filter_func, filename_format_func=filename_format_func
|
|
566
|
+
)
|
|
567
|
+
files_from_ac: List[AssetCollectionFile] = filter_ac_files(wi, args.file_pattern, args.exclude_pattern)
|
|
568
|
+
return files, files_from_ac
|