idmtools-platform-comps 0.0.0.dev0__py3-none-any.whl → 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. idmtools_platform_comps/__init__.py +25 -8
  2. idmtools_platform_comps/cli/__init__.py +4 -0
  3. idmtools_platform_comps/cli/cli_functions.py +50 -0
  4. idmtools_platform_comps/cli/comps.py +492 -0
  5. idmtools_platform_comps/comps_cli.py +48 -0
  6. idmtools_platform_comps/comps_operations/__init__.py +6 -0
  7. idmtools_platform_comps/comps_operations/asset_collection_operations.py +263 -0
  8. idmtools_platform_comps/comps_operations/experiment_operations.py +569 -0
  9. idmtools_platform_comps/comps_operations/simulation_operations.py +678 -0
  10. idmtools_platform_comps/comps_operations/suite_operations.py +228 -0
  11. idmtools_platform_comps/comps_operations/workflow_item_operations.py +269 -0
  12. idmtools_platform_comps/comps_platform.py +309 -0
  13. idmtools_platform_comps/plugin_info.py +168 -0
  14. idmtools_platform_comps/ssmt_operations/__init__.py +6 -0
  15. idmtools_platform_comps/ssmt_operations/simulation_operations.py +77 -0
  16. idmtools_platform_comps/ssmt_operations/workflow_item_operations.py +73 -0
  17. idmtools_platform_comps/ssmt_platform.py +44 -0
  18. idmtools_platform_comps/ssmt_work_items/__init__.py +4 -0
  19. idmtools_platform_comps/ssmt_work_items/comps_work_order_task.py +29 -0
  20. idmtools_platform_comps/ssmt_work_items/comps_workitems.py +113 -0
  21. idmtools_platform_comps/ssmt_work_items/icomps_workflowitem.py +71 -0
  22. idmtools_platform_comps/ssmt_work_items/work_order.py +54 -0
  23. idmtools_platform_comps/utils/__init__.py +4 -0
  24. idmtools_platform_comps/utils/assetize_output/__init__.py +4 -0
  25. idmtools_platform_comps/utils/assetize_output/assetize_output.py +125 -0
  26. idmtools_platform_comps/utils/assetize_output/assetize_ssmt_script.py +144 -0
  27. idmtools_platform_comps/utils/base_singularity_work_order.json +6 -0
  28. idmtools_platform_comps/utils/download/__init__.py +4 -0
  29. idmtools_platform_comps/utils/download/download.py +178 -0
  30. idmtools_platform_comps/utils/download/download_ssmt.py +81 -0
  31. idmtools_platform_comps/utils/download_experiment.py +116 -0
  32. idmtools_platform_comps/utils/file_filter_workitem.py +519 -0
  33. idmtools_platform_comps/utils/general.py +358 -0
  34. idmtools_platform_comps/utils/linux_mounts.py +73 -0
  35. idmtools_platform_comps/utils/lookups.py +123 -0
  36. idmtools_platform_comps/utils/package_version.py +489 -0
  37. idmtools_platform_comps/utils/python_requirements_ac/__init__.py +4 -0
  38. idmtools_platform_comps/utils/python_requirements_ac/create_asset_collection.py +155 -0
  39. idmtools_platform_comps/utils/python_requirements_ac/install_requirements.py +109 -0
  40. idmtools_platform_comps/utils/python_requirements_ac/requirements_to_asset_collection.py +374 -0
  41. idmtools_platform_comps/utils/python_version.py +40 -0
  42. idmtools_platform_comps/utils/scheduling.py +154 -0
  43. idmtools_platform_comps/utils/singularity_build.py +491 -0
  44. idmtools_platform_comps/utils/spatial_output.py +76 -0
  45. idmtools_platform_comps/utils/ssmt_utils/__init__.py +6 -0
  46. idmtools_platform_comps/utils/ssmt_utils/common.py +70 -0
  47. idmtools_platform_comps/utils/ssmt_utils/file_filter.py +568 -0
  48. idmtools_platform_comps/utils/sweeping.py +162 -0
  49. idmtools_platform_comps-0.0.2.dist-info/METADATA +100 -0
  50. idmtools_platform_comps-0.0.2.dist-info/RECORD +62 -0
  51. idmtools_platform_comps-0.0.2.dist-info/entry_points.txt +9 -0
  52. idmtools_platform_comps-0.0.2.dist-info/licenses/LICENSE.TXT +3 -0
  53. {idmtools_platform_comps-0.0.0.dev0.dist-info → idmtools_platform_comps-0.0.2.dist-info}/top_level.txt +1 -0
  54. ssmt_image/Dockerfile +52 -0
  55. ssmt_image/Makefile +21 -0
  56. ssmt_image/__init__.py +6 -0
  57. ssmt_image/bootstrap.sh +30 -0
  58. ssmt_image/build_docker_image.py +161 -0
  59. ssmt_image/pip.conf +3 -0
  60. ssmt_image/push_docker_image.py +49 -0
  61. ssmt_image/requirements.txt +9 -0
  62. idmtools_platform_comps-0.0.0.dev0.dist-info/METADATA +0 -41
  63. idmtools_platform_comps-0.0.0.dev0.dist-info/RECORD +0 -5
  64. {idmtools_platform_comps-0.0.0.dev0.dist-info → idmtools_platform_comps-0.0.2.dist-info}/WHEEL +0 -0
@@ -0,0 +1,568 @@
1
+ """idmtools ssmt file filter tools.
2
+
3
+ Copyright 2021, Bill & Melinda Gates Foundation. All rights reserved.
4
+ """
5
+ import argparse
6
+ import glob
7
+ import json
8
+ import os
9
+ import uuid
10
+ from collections import defaultdict
11
+ from concurrent.futures._base import as_completed, Future
12
+ from concurrent.futures.thread import ThreadPoolExecutor
13
+ from logging import DEBUG, getLogger
14
+ from pathlib import PurePath
15
+ from typing import List, Tuple, Set, Callable
16
+ import humanfriendly
17
+ from COMPS.Data import WorkItem, Experiment, Simulation, AssetCollectionFile, AssetCollection, QueryCriteria, CommissionableEntity
18
+ from COMPS.Data.Simulation import SimulationState
19
+ from COMPS.Data.WorkItem import RelationType
20
+ from tabulate import tabulate
21
+ from tqdm import tqdm
22
+ from idmtools_platform_comps.utils.file_filter_workitem import FilenameFormatFunction
23
+
24
+ try:
25
+ from common import setup_verbose
26
+ except (FileNotFoundError, ImportError):
27
+ from idmtools_platform_comps.utils.ssmt_utils.common import setup_verbose
28
+
29
+ logger = getLogger(__name__)
30
+ user_logger = getLogger('user')
31
+ # Our Asset Tuple we use to gather data on files
32
+ # The format is Source Filename, Destination Filename, Checksum, and then Filesize
33
+ AssetTuple = Tuple[str, str, uuid.UUID, int]
34
+ SetOfAssets = Set[AssetTuple]
35
+ # Store the Done State
36
+ DONE_STATE = [SimulationState.Failed, SimulationState.Canceled, SimulationState.Succeeded]
37
+ HPC_JOBS_QUERY = QueryCriteria().select_children(['tags', 'hpc_jobs']).orderby('date_created desc')
38
+
39
+ # Define our function that can be used as callbacks for filtering entities
40
+ EntityFilterFunc = Callable[[CommissionableEntity.CommissionableEntity], bool]
41
+
42
+
43
+ def get_common_parser(app_description):
44
+ """Creates a get common argument parser used with file filter function."""
45
+ parser = argparse.ArgumentParser(app_description)
46
+ parser.add_argument("--file-pattern", nargs="+", help="File Pattern to Filter")
47
+ parser.add_argument("--exclude-pattern", default=None, nargs="+", help="Exclude File Pattern to Filter")
48
+ parser.add_argument(
49
+ "--simulation-prefix-format-str", default="{simulation.id}",
50
+ help="Format for prefix of outputs from simulations. Defaults to the simulation id. When setting, you have access to the full simulation object. "
51
+ "If you are filtering an experiment, you have also have access to experiment object"
52
+ )
53
+ parser.add_argument("--no-simulation-prefix", default=False, action='store_true', help="No simulation prefix. Be careful because this could cause file collisions")
54
+ parser.add_argument("--work-item-prefix-format-str", default=None, help="Format for prefix of workitem outputs. Defaults to None. Useful when combining outputs of multiple work-items")
55
+ parser.add_argument("--assets", default=False, action='store_true', help="Include Assets")
56
+ parser.add_argument("--verbose", default=False, action="store_true", help="Verbose logging")
57
+ parser.add_argument("--pre-run-func", default=None, action='append', help="List of function to run before starting analysis. Useful to load packages up in docker container before run")
58
+ parser.add_argument("--entity-filter-func", default=None, help="Name of function that can be used to filter items")
59
+ parser.add_argument("--filename-format-func", default=None, help="Name of function that can be used to format filenames")
60
+ parser.add_argument("--dry-run", default=False, action="store_true", help="Find files, but don't add")
61
+ return parser
62
+
63
+
64
+ def gather_files(directory: str, file_patterns: List[str], exclude_patterns: List[str] = None, assets: bool = False, prefix: str = None, filename_format_func: FilenameFormatFunction = None) -> SetOfAssets:
65
+ """
66
+ Gather file_list.
67
+
68
+ Args:
69
+ directory: Directory to gather from
70
+ file_patterns: List of file patterns
71
+ exclude_patterns: List of patterns to exclude
72
+ assets: Should assets be included
73
+ prefix: Prefix for file_list
74
+ filename_format_func: Function that can format the filename
75
+
76
+ Returns:
77
+ Return files that match patterns.
78
+ """
79
+ from idmtools.utils.hashing import calculate_md5
80
+ file_list = set()
81
+ # Loop through our patterns
82
+ for pattern in file_patterns:
83
+ # User glob to search each directory using the pattern. We also do full recursion here
84
+
85
+ sd = os.path.join(directory, pattern)
86
+ if logger.isEnabledFor(DEBUG):
87
+ logger.debug(f'Looking for files with pattern {sd}')
88
+ for file in glob.iglob(sd, recursive=True):
89
+ # Ensure it is a file and not a directory
90
+ if logger.isEnabledFor(DEBUG):
91
+ logger.debug(f'{file.encode("ascii", "ignore").decode("utf-8")} matching pattern. Is Dir: {os.path.isdir(file)}. Is Link: {os.path.islink(file)}')
92
+ if os.path.isfile(file):
93
+ if logger.isEnabledFor(DEBUG):
94
+ logger.debug(f'Found file {file.encode("ascii", "ignore").decode("utf-8")}')
95
+ # Create our shortname. This will remove the base directory from the file. Eg
96
+ # If are scanning C:\ABC\, the file C:\ABC\DEF\123.txt will be DEF\123.txt
97
+ short_name = file.replace(directory + os.path.sep, "")
98
+ # Setup destination name which is just joining prefix if it exists
99
+ dest_name = os.path.join(prefix if prefix else '', short_name)
100
+ filesize = os.stat(file).st_size
101
+ if filename_format_func:
102
+ dest_name = filename_format_func(dest_name)
103
+ # Process assets separately than regular files
104
+ if short_name.startswith("Assets"):
105
+ if assets:
106
+ file_list.add((file, dest_name, uuid.UUID(calculate_md5(file)), filesize))
107
+ else:
108
+ file_list.add((file, dest_name, uuid.UUID(calculate_md5(file)), filesize))
109
+
110
+ # Now strip file that match exclude patterns. We do this after since the regular expressions here are a bit more expensive, so a we are at the
111
+ # minimum possible files we must scan as this point. We did possible calculate extra md5s here
112
+ if logger.isEnabledFor(DEBUG):
113
+ logger.debug(f"File count before excluding: {len(file_list)} in {directory}")
114
+ result = set([f for f in file_list if not is_file_excluded(f[0], exclude_patterns)])
115
+ if logger.isEnabledFor(DEBUG):
116
+ logger.debug(f"File count after excluding: {len(file_list)} in {directory}")
117
+ return result
118
+
119
+
120
+ def is_file_excluded(filename: str, exclude_patterns: List[str]) -> bool:
121
+ """
122
+ Is file excluded by excluded patterns.
123
+
124
+ Args:
125
+ filename: File to filter
126
+ exclude_patterns: List of file patterns to exclude
127
+
128
+ Returns:
129
+ True is file is excluded
130
+ """
131
+ for pattern in exclude_patterns:
132
+ if PurePath(filename.lower()).match(pattern.lower()):
133
+ return True
134
+ return False
135
+
136
+
137
+ def gather_files_from_related(work_item: WorkItem, file_patterns: List[str], exclude_patterns: List[str], assets: bool, simulation_prefix_format_str: str, work_item_prefix_format_str: str, entity_filter_func: EntityFilterFunc,
138
+ filename_format_func: FilenameFormatFunction) -> SetOfAssets: # pragma: no cover
139
+ """
140
+ Gather files from different related entities.
141
+
142
+ Args:
143
+ work_item: Work item to gather from
144
+ file_patterns: List of File Patterns
145
+ exclude_patterns: List of Exclude patterns
146
+ assets: Should items be gathered from Assets Directory
147
+ simulation_prefix_format_str: Format string for prefix of Simulations
148
+ work_item_prefix_format_str: Format string for prefix of WorkItem
149
+ entity_filter_func: Function to filter entities
150
+ filename_format_func: Filename filter function
151
+
152
+ Returns:
153
+ Set of File Tuples in format Filename, Destination Name, and Checksum
154
+ """
155
+ file_list = set()
156
+ # Setup threading work using a future list and a ThreadPoolExecutor
157
+ futures = []
158
+ pool = ThreadPoolExecutor()
159
+ if logger.isEnabledFor(DEBUG):
160
+ logger.debug("Filtering experiments")
161
+ filter_experiments(assets, entity_filter_func, exclude_patterns, file_patterns, futures, pool, simulation_prefix_format_str, work_item, filename_format_func=filename_format_func)
162
+ if logger.isEnabledFor(DEBUG):
163
+ logger.debug("Filtering simulations")
164
+ filter_simulations_files(assets, entity_filter_func, exclude_patterns, file_patterns, futures, pool, simulation_prefix_format_str, work_item, filename_format_func=filename_format_func)
165
+ if logger.isEnabledFor(DEBUG):
166
+ logger.debug("Filtering workitems")
167
+ filter_work_items_files(assets, entity_filter_func, exclude_patterns, file_patterns, futures, pool, work_item, work_item_prefix_format_str, filename_format_func=filename_format_func)
168
+
169
+ if logger.isEnabledFor(DEBUG):
170
+ logger.debug("Waiting on filtering to complete")
171
+ # Now wait until scanning items has completed
172
+ for future in tqdm(as_completed(futures), total=len(futures), desc="Filtering relations for files"):
173
+ file_list.update(future.result())
174
+
175
+ if logger.isEnabledFor(DEBUG):
176
+ logger.debug(f"Total Files found: {len(file_list)}")
177
+ return file_list
178
+
179
+
180
+ def filter_experiments(assets: bool, entity_filter_func: EntityFilterFunc, exclude_patterns_compiles: List, file_patterns: List[str], futures: List[Future], pool: ThreadPoolExecutor, simulation_prefix_format_str: str, work_item: WorkItem,
181
+ filename_format_func: FilenameFormatFunction): # pragma: no cover
182
+ """
183
+ Filter Experiments outputs using our patterns.
184
+
185
+ Args:
186
+ assets: Assets to filter
187
+ entity_filter_func: Function to filter functions
188
+ exclude_patterns_compiles: List of patterns to exclude
189
+ file_patterns: File patterns to match
190
+ futures: Future queue
191
+ pool: Pool to execute jobs on
192
+ simulation_prefix_format_str: Format string for prefix of Simulations
193
+ work_item: Parent WorkItem
194
+ filename_format_func: Function to filter filenames
195
+
196
+ Returns:
197
+ None
198
+ """
199
+ # Start with experiments since they are the most complex portion
200
+ experiments: List[Experiment] = work_item.get_related_experiments()
201
+ for experiment in experiments:
202
+ if logger.isEnabledFor(DEBUG):
203
+ logger.debug(f'Running filter on {experiment.name}/{experiment.id}')
204
+ if entity_filter_func(experiment):
205
+ if logger.isEnabledFor(DEBUG):
206
+ logger.debug(f'Loading simulations for filter on {experiment.name}/{experiment.id}')
207
+ # Fetch simulations with the hpc_jobs criteria. This allows us to lookup the directory
208
+ simulations = experiment.get_simulations(HPC_JOBS_QUERY)
209
+ if logger.isEnabledFor(DEBUG):
210
+ logger.debug(f"Total simulations to evaluate {len(simulations)}")
211
+ if len(simulations) > 0:
212
+ filter_experiment_assets(work_item, assets, entity_filter_func, exclude_patterns_compiles, experiment, file_patterns, futures, pool, simulation_prefix_format_str, simulations, filename_format_func=filename_format_func)
213
+ # Loop through each simulation and queue it up for file matching
214
+ filter_simulation_list(assets, entity_filter_func, exclude_patterns_compiles, file_patterns, futures, pool, simulation_prefix_format_str, simulations, work_item, experiment=experiment, filename_format_func=filename_format_func)
215
+
216
+
217
+ def get_simulation_prefix(parent_work_item: WorkItem, simulation: Simulation, simulation_prefix_format_str: str, experiment: Experiment = None) -> str:
218
+ """
219
+ Get Simulation Prefix.
220
+
221
+ Args:
222
+ parent_work_item: Parent workitem
223
+ simulation: Simulation to form
224
+ simulation_prefix_format_str: Prefix format string
225
+ experiment: Optional experiment to be used with the
226
+
227
+ Returns:
228
+ Name of the simulation
229
+ """
230
+ prefix = simulation_prefix_format_str.format(simulation=simulation, experiment=experiment, parent_workitem=parent_work_item) if simulation_prefix_format_str else None
231
+ if logger.isEnabledFor(DEBUG):
232
+ logger.debug(f'Simulation Prefix: {prefix}')
233
+ return prefix
234
+
235
+
236
+ def filter_experiment_assets(
237
+ work_item: WorkItem, assets: bool, entity_filter_func: EntityFilterFunc, exclude_patterns_compiles: List, experiment: Experiment, file_patterns: List[str],
238
+ futures: List[Future], pool: ThreadPoolExecutor, simulation_prefix_format_str: str, simulations: List[Simulation], filename_format_func: FilenameFormatFunction): # pragma: no cover
239
+ """
240
+ Filter experiment assets. This method uses the first simulation to gather experiment assets.
241
+
242
+ Args:
243
+ work_item: Parent Workitem
244
+ assets: Whether assets should be matched
245
+ entity_filter_func: Entity Filter Function
246
+ exclude_patterns_compiles: List of files to exclude
247
+ experiment: Experiment
248
+ file_patterns: File patterns to filter
249
+ futures: List of futures
250
+ pool: Pool to submit search jobs to
251
+ simulation_prefix_format_str: Format string for simulation
252
+ simulations: List of simulations
253
+ filename_format_func: Name function for filename
254
+
255
+ Returns:
256
+ None
257
+ """
258
+ # If we should gather assets, use the first simulation. It means we will duplicate some work, but the set will filter out duplicated
259
+ if assets:
260
+ # find the first simulation we can use to gather assets from
261
+ i = 0
262
+ while not entity_filter_func(simulations[i]) and i < len(simulations):
263
+ i += 1
264
+ if i < len(simulations):
265
+ simulation = simulations[i]
266
+ if logger.isEnabledFor(DEBUG):
267
+ logger.debug(f'Loading assets for {experiment.name} from simulation {simulation.id}')
268
+ # create prefix from the format var
269
+ prefix = get_simulation_prefix(work_item, simulation, simulation_prefix_format_str, experiment)
270
+ futures.append(pool.submit(gather_files, directory=simulation.hpc_jobs[0].working_directory, file_patterns=file_patterns, exclude_patterns=exclude_patterns_compiles, assets=assets, prefix=prefix, filename_format_func=filename_format_func))
271
+
272
+
273
+ def filter_simulations_files(assets: bool, entity_filter_func: EntityFilterFunc, exclude_patterns_compiles: List, file_patterns: List[str], futures: List[Future], pool: ThreadPoolExecutor, simulation_prefix_format_str: str, work_item: WorkItem,
274
+ filename_format_func: FilenameFormatFunction): # pragma: no cover
275
+ """
276
+ Filter Simulations files.
277
+
278
+ Args:
279
+ assets: Whether assets should be matched
280
+ entity_filter_func: Entity Filter Function
281
+ exclude_patterns_compiles: List of files to exclude
282
+ file_patterns: File patterns to filter
283
+ futures: List of futures
284
+ pool: Pool to submit search jobs to
285
+ simulation_prefix_format_str: Format string for simulation
286
+ work_item:
287
+ filename_format_func: Filename function
288
+
289
+ Returns:
290
+ None
291
+ """
292
+ # Here we loop through simulations directly added by user. We do not
293
+ simulations: List[Simulation] = work_item.get_related_simulations()
294
+ filter_simulation_list(assets, entity_filter_func, exclude_patterns_compiles, file_patterns, futures, pool, simulation_prefix_format_str, simulations, work_item, filename_format_func=filename_format_func)
295
+
296
+
297
+ def filter_simulation_list(assets: bool, entity_filter_func: EntityFilterFunc, exclude_patterns_compiles: List, file_patterns: List[str], futures: List[Future], pool: ThreadPoolExecutor, simulation_prefix_format_str: str, simulations: List[Simulation], work_item: WorkItem,
298
+ experiment: Experiment = None, filename_format_func: FilenameFormatFunction = None): # pragma: no cover
299
+ """
300
+ Filter simulations list. This method is used for experiments and simulations.
301
+
302
+ Args:
303
+ assets: Whether assets should be matched
304
+ entity_filter_func: Entity Filter Function
305
+ exclude_patterns_compiles: List of files to exclude
306
+ file_patterns: File patterns to filter
307
+ futures: List of futures
308
+ pool: Pool to submit search jobs to
309
+ simulation_prefix_format_str: Format string for simulation
310
+ simulations: List of simulations
311
+ work_item: Parent workitem
312
+ experiment: Optional experiment.
313
+ filename_format_func: Filename function
314
+
315
+ Returns:
316
+ None
317
+ """
318
+ for simulation in simulations:
319
+ if entity_filter_func(simulation):
320
+ if logger.isEnabledFor(DEBUG):
321
+ logger.debug(f'Loading outputs from Simulation {simulation.id} with status of {simulation.state}')
322
+ prefix = get_simulation_prefix(parent_work_item=work_item, experiment=experiment, simulation=simulation, simulation_prefix_format_str=simulation_prefix_format_str)
323
+ if simulation.hpc_jobs is None:
324
+ simulation = simulation.get(simulation.id, HPC_JOBS_QUERY)
325
+ futures.append(pool.submit(gather_files, directory=simulation.hpc_jobs[0].working_directory, file_patterns=file_patterns, exclude_patterns=exclude_patterns_compiles, assets=assets, prefix=prefix, filename_format_func=filename_format_func))
326
+
327
+
328
+ def filter_work_items_files(assets: bool, entity_filter_func: EntityFilterFunc, exclude_patterns_compiles: List, file_patterns: List[str], futures: List[Future], pool: ThreadPoolExecutor, work_item: WorkItem, work_item_prefix_format_str: str,
329
+ filename_format_func: FilenameFormatFunction): # pragma: no cover
330
+ """
331
+ Filter work items files.
332
+
333
+ Args:
334
+ assets: Whether assets should be matched
335
+ entity_filter_func: Entity Filter Function
336
+ exclude_patterns_compiles: List of files to exclude
337
+ file_patterns: File patterns to filter
338
+ futures: List of futures
339
+ pool: Pool to submit search jobs to
340
+ work_item: WorkItem
341
+ work_item_prefix_format_str: WorkItemPrefix
342
+ filename_format_func: Filename function
343
+
344
+ Returns:
345
+ None
346
+ """
347
+ # Here we loop through workitems
348
+ work_items: List[WorkItem] = work_item.get_related_work_items()
349
+ for related_work_item in work_items:
350
+ if entity_filter_func(related_work_item):
351
+ if logger.isEnabledFor(DEBUG):
352
+ logger.debug(f'Loading outputs from WorkItem {related_work_item.name} - {related_work_item.id}')
353
+ prefix = work_item_prefix_format_str.format(work_item=related_work_item, parent_work_item=work_item) if work_item_prefix_format_str else None
354
+ futures.append(pool.submit(gather_files, directory=related_work_item.working_directory, file_patterns=file_patterns, exclude_patterns=exclude_patterns_compiles, assets=assets, prefix=prefix, filename_format_func=filename_format_func))
355
+
356
+
357
+ def filter_ac_files(wi: WorkItem, patterns, exclude_patterns) -> List[AssetCollectionFile]: # pragma: no cover
358
+ """
359
+ Filter Asset Collection File.
360
+
361
+ Args:
362
+ wi: WorkItem
363
+ patterns: File patterns
364
+ exclude_patterns: Exclude patterns
365
+
366
+ Returns:
367
+ List of filters asset collection files
368
+ """
369
+ if logger.isEnabledFor(DEBUG):
370
+ logger.debug('Filtering asset collections')
371
+ relates_acs: List[AssetCollection] = wi.get_related_asset_collections(relation_type=RelationType.DependsOn)
372
+ filtered_ac_files = set()
373
+ for ac in relates_acs:
374
+ ac = ac.get(ac.id, QueryCriteria().select_children("assets"))
375
+ for file in ac.assets:
376
+ file_path = get_asset_file_path(file)
377
+ for pattern in patterns:
378
+ if PurePath(file_path).match(pattern):
379
+ filtered_ac_files.add(file)
380
+ # break out of pattern loop since there was a match
381
+ break
382
+
383
+ return [f for f in filtered_ac_files if not is_file_excluded(get_asset_file_path(f), exclude_patterns)]
384
+
385
+
386
+ def get_asset_file_path(file):
387
+ """
388
+ Get asset file path which combined the relative path and filename if relative path is set.
389
+
390
+ Otherwise we use just the filename.
391
+
392
+ Args:
393
+ file: Filename
394
+
395
+ Returns:
396
+ Filename
397
+ """
398
+ return os.path.join(file.relative_path, file.file_name) if file.relative_path else file.file_name
399
+
400
+
401
+ class DuplicateAsset(Exception):
402
+ """Error for when we encountered output paths that overlap."""
403
+ doc_link: str = "platforms/comps/assetize_output.html#errors"
404
+
405
+
406
+ def ensure_no_duplicates(ac_files, files): # pragma: no cover
407
+ """
408
+ Ensure no duplicates are in asset.
409
+
410
+ Args:
411
+ ac_files: Ac files
412
+ files: Simulation/Experiment/Workitem files
413
+
414
+ Returns:
415
+ None
416
+
417
+ Raises:
418
+ DuplicateAsset - if asset with same output path is found
419
+ """
420
+ dest_paths = defaultdict(int)
421
+ for file in ac_files:
422
+ fn = os.path.join(file.relative_path, file.file_name) if file.relative_path else file.file_name
423
+ dest_paths[fn] += 1
424
+ for file in files:
425
+ dest_paths[file[1]] += 1
426
+ # we should have one count for all items(1). If we have more than one count, than there are duplicates
427
+ if any([x > 1 for x in set(dest_paths.values())]):
428
+ duplicate_assets = [x for x, count in dest_paths.items() if count > 1]
429
+ error_files = []
430
+ # match up to assets
431
+ for asset in ac_files:
432
+ fn = os.path.join(asset.relative_path, asset.file_name) if asset.relative_path else asset.file_name
433
+ if fn in duplicate_assets:
434
+ error_files.append(f'{fn} from Asset Collections')
435
+ for file in files:
436
+ if file[1] in duplicate_assets:
437
+ error_files.append(f'{file[1]}<{file[0]}> from Experiment, Simulation, or WorkItem')
438
+ nl = "\n"
439
+ raise DuplicateAsset(f"The following assets have duplicate destination paths:{nl} {nl.join(sorted(error_files))}")
440
+
441
+
442
+ def print_results(ac_files, files): # pragma: no cover
443
+ """
444
+ Print Results.
445
+
446
+ Args:
447
+ ac_files: Ac Files
448
+ files: Files
449
+
450
+ Returns:
451
+ None
452
+ """
453
+ all_files = []
454
+ for file in files:
455
+ all_files.append(dict(filename=file[0], destname=file[1], filesize=file[3]))
456
+ total_file_size = sum([f[3] for f in files])
457
+ for af in ac_files:
458
+ fn = get_asset_file_path(af)
459
+ all_files.append(dict(filename=fn, destname=fn, filesize=af._length))
460
+ with open("file_list.json", 'w') as flist:
461
+ json.dump(all_files, flist, indent=4, sort_keys=True)
462
+ header = all_files[0].keys()
463
+ rows = [x.values() for x in sorted(all_files, key=lambda x: x['destname'])]
464
+ with open("file_list.html", "w") as html_list:
465
+ html_list.write(tabulate(rows, header, tablefmt='html'))
466
+
467
+ print(f'Total asset collection size: {humanfriendly.format_size(total_file_size)}')
468
+
469
+
470
+ def apply_custom_filters(args: argparse.Namespace):
471
+ """
472
+ Apply user defined custom filter functions.
473
+
474
+ The function does the following workflow.
475
+
476
+ 1. Check if there is a pre_run_func(s) defined.
477
+ 1b) If there are pre-run funcs, run each of those
478
+ 2) Is there an entity_filter_func. This function allows us to filter items(Experiment/Simulations/etc) directly. If not defined, we use a default function returns true.
479
+ 3) If filename format function is defined, we set that, otherwise we use the default which just uses the original file name
480
+
481
+ Args:
482
+ args: argparse namespace.
483
+
484
+ Returns:
485
+ entity_filter_func and filename format func
486
+ """
487
+ if args.pre_run_func:
488
+ import pre_run
489
+ for pre_run_func in args.pre_run_func:
490
+ if logger.isEnabledFor(DEBUG):
491
+ logger.debug(f"Calling PreRunFunc: {pre_run_func}")
492
+ getattr(pre_run, args.pre_run_func)()
493
+ # set a default filter function that returns true if none are set
494
+ if args.entity_filter_func:
495
+ import entity_filter_func
496
+ entity_filter_func = getattr(entity_filter_func, args.entity_filter_func)
497
+ else:
498
+ if logger.isEnabledFor(DEBUG):
499
+ logger.debug("Setting default filter function")
500
+
501
+ def default_filter_func(x):
502
+ return True
503
+
504
+ entity_filter_func = default_filter_func
505
+
506
+ if args.filename_format_func:
507
+ import filename_format_func
508
+ fn_format_func = getattr(filename_format_func, args.filename_format_func)
509
+ else:
510
+ def default_format_func(s: str):
511
+ return s
512
+
513
+ fn_format_func = default_format_func
514
+
515
+ return entity_filter_func, fn_format_func
516
+
517
+
518
+ def parse_filter_args_common(args: argparse.Namespace):
519
+ """
520
+ Parse filter arguments from an argparse namespace.
521
+
522
+ We need this because we use filtering across multiple scripts.
523
+
524
+ Args:
525
+ args: Argparse args
526
+
527
+ Returns:
528
+ entity_filter_func and filename formart func
529
+ """
530
+ if args.verbose:
531
+ setup_verbose(args)
532
+ if "**" in args.file_pattern:
533
+ args.file_pattern = ["**"]
534
+ entity_filter_func, fn_format_func = apply_custom_filters(args)
535
+ for i, a in enumerate(args.exclude_pattern):
536
+ args.exclude_pattern[i] = a.replace("\\*", "*")
537
+ for i, a in enumerate(args.file_pattern):
538
+ if a.startswith("'") and a.endswith("'"):
539
+ args.file_pattern[i] = a.replace("\\*", "*")
540
+ for i in ['simulation_prefix_format_str', 'work_item_prefix_format_str']:
541
+ si = getattr(args, i)
542
+ if si and si.startswith("'") and si.endswith("'"):
543
+ si = si.strip("'")
544
+ setattr(args, i, si)
545
+ return entity_filter_func, fn_format_func
546
+
547
+
548
+ def filter_files_and_assets(args: argparse.Namespace, entity_filter_func: EntityFilterFunc, wi: WorkItem, filename_format_func: FilenameFormatFunction) -> Tuple[SetOfAssets, List[AssetCollectionFile]]:
549
+ """
550
+ Filter files and assets using provided parameters.
551
+
552
+ Args:
553
+ args: Argparse details
554
+ entity_filter_func: Optional filter function for entities. This function is ran on every item. If it returns true, we return the item
555
+ wi: WorkItem we are running in
556
+ filename_format_func: Filename format function allows use to customize how we filter filenames for output.
557
+
558
+ Returns:
559
+ Files that matches the filter and the assets that matches the filter as well.
560
+ """
561
+ files = gather_files_from_related(
562
+ wi, file_patterns=args.file_pattern, exclude_patterns=args.exclude_pattern if args.exclude_pattern else [], assets=args.assets,
563
+ work_item_prefix_format_str=args.work_item_prefix_format_str,
564
+ simulation_prefix_format_str=args.simulation_prefix_format_str if not args.no_simulation_prefix else None,
565
+ entity_filter_func=entity_filter_func, filename_format_func=filename_format_func
566
+ )
567
+ files_from_ac: List[AssetCollectionFile] = filter_ac_files(wi, args.file_pattern, args.exclude_pattern)
568
+ return files, files_from_ac