psr-factory 5.0.0b21__py3-none-win_amd64.whl → 5.0.0b69__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
psr/cloud/cloud.py DELETED
@@ -1,1444 +0,0 @@
1
- # PSR Cloud. Copyright (C) PSR, Inc - All Rights Reserved
2
- # Unauthorized copying of this file, via any medium is strictly prohibited
3
- # Proprietary and confidential
4
-
5
- import copy
6
- import functools
7
- import gzip
8
- import hashlib
9
- import logging
10
- import os
11
- import re
12
- import shutil
13
- import subprocess
14
- import sys
15
- import warnings
16
- import xml.etree.ElementTree as ET
17
- from datetime import datetime, timedelta
18
- from pathlib import Path
19
- from time import sleep, time
20
- from typing import List, Optional, Union
21
-
22
- import pefile
23
- import zeep
24
- from filelock import FileLock
25
-
26
- from .aws import download_case_from_s3, upload_case_to_s3
27
- from .data import Case, CloudError, CloudInputError
28
- from .desktop import import_case
29
- from .log import enable_log_timestamp, get_logger
30
- from .status import FAULTY_TERMINATION_STATUS, STATUS_MAP_TEXT, ExecutionStatus
31
- from .tempfile import CreateTempFile
32
- from .version import __version__
33
- from .xml import create_case_xml
34
-
35
- INTERFACE_VERSION = "PyCloud " + __version__ + ", binding for " + sys.version
36
-
37
-
38
- def thread_safe():
39
- """
40
- Decorator to make a function thread-safe using filelock.
41
- :param lock_file: Path to the lock file. If None, it will be automatically generated.
42
- """
43
-
44
- def decorator(func):
45
- @functools.wraps(func)
46
- def wrapper(*args, **kwargs):
47
- with FileLock("pycloud.lock"):
48
- return func(*args, **kwargs)
49
-
50
- return wrapper
51
-
52
- return decorator
53
-
54
-
55
- def _md5sum(value: str, enconding=True) -> str:
56
- if enconding:
57
- return hashlib.md5(value.encode("utf-8")).hexdigest() # nosec
58
- else:
59
- # hash binary data
60
- return hashlib.md5(value).hexdigest() # nosec
61
-
62
-
63
- def hash_password(password: str) -> str:
64
- return _md5sum(password).upper()
65
-
66
-
67
- def _check_for_errors(
68
- xml: ET.ElementTree, logger: Optional[logging.Logger] = None
69
- ) -> None:
70
- error = xml.find("./Parametro[@nome='erro']")
71
- if error is not None:
72
- if logger is not None:
73
- logger.error(error.text)
74
- raise CloudError(error.text)
75
-
76
-
77
- def _hide_password(params: str) -> str:
78
- pattern = r'(<Parametro nome="senha"[^>]*>)(.*?)(</Parametro>)'
79
- result = re.sub(pattern, r"\1********\3", params)
80
- return result
81
-
82
-
83
- def _xml_to_str(xml_content: ET.ElementTree) -> str:
84
- # Remove <Parametro nome="senha" ...> tag, if found in the xml_content
85
- xml_str = ET.tostring(
86
- xml_content.getroot(), encoding="utf-8", method="xml"
87
- ).decode()
88
- return _hide_password(xml_str)
89
-
90
-
91
- def _handle_relative_path(path: str) -> str:
92
- if not os.path.isabs(path):
93
- return os.path.abspath(path)
94
- return path
95
-
96
-
97
- _PSRCLOUD_PATH = r"C:\PSR\PSRCloud"
98
-
99
- _CONSOLE_REL_PARENT_PATH = r"Oper\Console"
100
-
101
- _CONSOLE_APP = r"FakeConsole.exe"
102
-
103
- _ALLOWED_PROGRAMS = ["SDDP", "OPTGEN", "PSRIO", "GRAF", "MyModel", "GNoMo"]
104
-
105
- if os.name == "nt":
106
- _PSRCLOUD_CREDENTIALS_PATH = os.path.expandvars(
107
- os.path.join("%appdata%", "PSR", "PSRCloud", "EPSRConfig.xml")
108
- )
109
- else:
110
- _PSRCLOUD_CREDENTIALS_PATH = ""
111
-
112
- _PSRCLOUD_USER_ENV_VAR = "PSR_CLOUD_USER"
113
- _PSRCLOUD_PASSWORD_HASH_ENV_VAR = "PSR_CLOUD_PASSWORD_HASH" # nosec
114
- _PSRCLOUD_CONSOLE_ENV_VAR = "PSR_CLOUD_CONSOLE_PATH"
115
-
116
- _auth_error_message = f"Please set {_PSRCLOUD_USER_ENV_VAR} and {_PSRCLOUD_PASSWORD_HASH_ENV_VAR} environment variables."
117
-
118
- # FIXME uninspired name
119
- _DEFAULT_GET_CASES_SINCE_DAYS = 7
120
-
121
-
122
- _DEFAULT_CLUSTER = {
123
- "name": "PSR-US",
124
- "pretty_name": "External",
125
- "url": "https://psrcloud.psr-inc.com/CamadaGerenciadoraServicoWeb/DespachanteWS.asmx",
126
- }
127
-
128
-
129
- class Client:
130
- def __init__(self, **kwargs) -> None:
131
- self.cwd = Path.cwd()
132
-
133
- # Caches (avoiding multiple soap requests)
134
- self._cloud_version_xml_cache = None
135
- self._cloud_clusters_xml_cache = None
136
- self._instance_type_map = None
137
-
138
- # Options
139
- self._selected_cluster = kwargs.get("cluster", _DEFAULT_CLUSTER["pretty_name"])
140
- self._import_desktop = kwargs.get("import_desktop", True)
141
- self._debug_mode = kwargs.get("debug", False)
142
- self._timeout = kwargs.get("timeout", None)
143
- self._python_client = kwargs.get("python_client", False)
144
-
145
- # Client version
146
- self.application_version = kwargs.get("application_version", None)
147
-
148
- # Logging setup
149
- self._quiet = kwargs.get("quiet", False)
150
- self._verbose = kwargs.get("verbose", False)
151
- if self._debug_mode:
152
- self._quiet = False
153
- self._verbose = True
154
- log_id = id(self)
155
- self._logger = get_logger(
156
- log_id, quiet=self._quiet, debug_mode=self._debug_mode
157
- )
158
-
159
- self._logger.info(f"Client uid {log_id} initialized.")
160
-
161
- if self._python_client:
162
- self._logger.info(
163
- "Using Python client for PSR Cloud. Some features may not be available."
164
- )
165
- else:
166
- self._console_path_setup(**kwargs)
167
-
168
- self._credentials_setup(**kwargs)
169
-
170
- self._cluster_setup(self._selected_cluster)
171
-
172
- def _console_path_setup(self, **kwargs) -> None:
173
- # For common users - provide PSR Cloud install path
174
- if "psrcloud_path" in kwargs:
175
- psrcloud_path = Path(kwargs["psrcloud_path"])
176
- self._console_path = psrcloud_path / _CONSOLE_REL_PARENT_PATH / _CONSOLE_APP
177
- if not os.path.exists(self._console_path):
178
- err_msg = (
179
- f"PSR Cloud application not found at {self._console_path} "
180
- f"Make sure the path is correct and PSR Cloud is installed."
181
- )
182
- self._logger.error(err_msg)
183
- self._logger.info("Provided psrcloud_path: " + str(psrcloud_path))
184
- raise CloudError(err_msg)
185
- # For advanced users or tests - provide full FakeConsole.exe path.
186
- elif "fakeconsole_path" in kwargs:
187
- self._console_path = Path(kwargs["fakeconsole_path"])
188
- if not os.path.exists(self._console_path):
189
- err_msg = (
190
- f"PSR Cloud application not found at {self._console_path} "
191
- f"Make sure the path is correct and PSR Cloud is installed."
192
- )
193
- self._logger.error(err_msg)
194
- self._logger.info(
195
- "Provided fakeconsole_path: " + str(self._console_path)
196
- )
197
- raise CloudError(err_msg)
198
- # For advanced users or tests - provide PSR Cloud console path as environment variable.
199
- elif _PSRCLOUD_CONSOLE_ENV_VAR in os.environ:
200
- self._console_path = Path(os.environ[_PSRCLOUD_CONSOLE_ENV_VAR]).resolve()
201
- if not os.path.exists(self._console_path):
202
- err_msg = (
203
- f"PSR Cloud application not found at {self._console_path} "
204
- f"Make sure the path is correct and PSR Cloud is installed."
205
- )
206
- self._logger.error(err_msg)
207
- self._logger.info("Provided console path: " + str(self._console_path))
208
- raise CloudError(err_msg)
209
- else:
210
- self._console_path = (
211
- Path(_PSRCLOUD_PATH) / _CONSOLE_REL_PARENT_PATH / _CONSOLE_APP
212
- )
213
- if not os.path.exists(self._console_path):
214
- err_msg = (
215
- f"PSR Cloud application not found at {self._console_path} "
216
- f"Make sure the path is correct and PSR Cloud is installed."
217
- )
218
- self._logger.error(err_msg)
219
- self._logger.info("Using default console path.")
220
- raise CloudError(err_msg)
221
-
222
- self._logger.info(f"PSR Cloud console path: {self._console_path}")
223
- self._logger.info(f"PSR Cloud console version: {self._get_console_version()}")
224
-
225
- def _credentials_setup(self, **kwargs) -> None:
226
- self.username = kwargs.get("username", None)
227
- self.__password = None
228
- if self.username is not None:
229
- self.username = kwargs["username"]
230
- self.__password = hash_password(kwargs["password"])
231
- self._logger.info(
232
- "Using provided credentials from PSR Cloud console arguments."
233
- )
234
- self._logger.warning(
235
- "For security reasons, it is highly recommended to use environment variables to store your credentials.\n"
236
- + f"({_PSRCLOUD_USER_ENV_VAR}, {_PSRCLOUD_PASSWORD_HASH_ENV_VAR})"
237
- )
238
- else:
239
- if (
240
- _PSRCLOUD_USER_ENV_VAR in os.environ
241
- and _PSRCLOUD_PASSWORD_HASH_ENV_VAR in os.environ
242
- ):
243
- self.username = os.environ[_PSRCLOUD_USER_ENV_VAR]
244
- self.__password = os.environ[_PSRCLOUD_PASSWORD_HASH_ENV_VAR].upper()
245
- self._logger.info("Using credentials from environment variables")
246
- elif os.path.exists(_PSRCLOUD_CREDENTIALS_PATH):
247
- self._logger.info(
248
- "Environment variables for Cloud credentials not found"
249
- )
250
- xml = ET.parse(
251
- _PSRCLOUD_CREDENTIALS_PATH, parser=ET.XMLParser(encoding="utf-16")
252
- )
253
- root = xml.getroot()
254
- username = None
255
- _password = None
256
- for elem in root.iter("Aplicacao"):
257
- username = elem.attrib.get("SrvUsuario")
258
- _password = elem.attrib.get("SrvSenha")
259
- break
260
- if username is None or _password is None:
261
- err_msg = "Credentials not provided. " + _auth_error_message
262
- self._logger.info(
263
- "Loading credentials from file: " + _PSRCLOUD_CREDENTIALS_PATH
264
- )
265
- self._logger.error(err_msg)
266
- raise CloudInputError(err_msg)
267
- self.username = username
268
- self.__password = _password
269
- self._logger.info("Using credentials from PSR Cloud Desktop cache")
270
- else:
271
- err_msg = "Username and password not provided." + _auth_error_message
272
- self._logger.info(
273
- "Trying to get credentials from environment variables."
274
- )
275
- self._logger.error(err_msg)
276
- raise CloudInputError(err_msg)
277
- self._logger.info(f"Logged as {self.username}")
278
-
279
- def _cluster_setup(self, cluster_str: str) -> None:
280
- """
281
- Get cluster object by name.
282
- If the cluster is the default one, select it directly. If not, check using default cluster to get
283
- the available clusters for this user and select the one that matches the provided name.
284
- """
285
-
286
- if (
287
- _DEFAULT_CLUSTER["name"].upper() == cluster_str.upper()
288
- or _DEFAULT_CLUSTER["pretty_name"].capitalize() == cluster_str.capitalize()
289
- ):
290
- self.cluster = _DEFAULT_CLUSTER
291
- else:
292
- self.cluster = None
293
- clusters = self._get_clusters_by_user()
294
- for cluster in clusters:
295
- if (
296
- cluster["name"].upper() == cluster_str.upper()
297
- or cluster["pretty_name"].capitalize() == cluster_str.capitalize()
298
- ):
299
- self.cluster = cluster
300
-
301
- if self.cluster is not None:
302
- self._logger.info(
303
- f"Running on Cluster {self.cluster['name']} ({self.cluster['pretty_name']})"
304
- )
305
- else:
306
- raise CloudInputError(f"Cluster {cluster_str} not found")
307
-
308
- def set_cluster(self, cluster_str: str) -> None:
309
- self._cluster_setup(cluster_str)
310
- # Clear caches
311
- self._cloud_version_xml_cache = None
312
- self._cloud_clusters_xml_cache = None
313
- self._instance_type_map = None
314
-
315
- def _get_console_path(self) -> Path:
316
- return self._console_path
317
-
318
- def _get_console_parent_path(self) -> Path:
319
- return self._console_path.parent
320
-
321
- def _get_console_version(self) -> str:
322
- console_path = self._get_console_path()
323
- pe = pefile.PE(console_path)
324
- for file_info in getattr(pe, "FileInfo", []):
325
- for entry in file_info:
326
- for st in getattr(entry, "StringTable", []):
327
- product_version = st.entries.get(b"ProductVersion")
328
- if product_version:
329
- return product_version.decode()
330
-
331
- @staticmethod
332
- def _check_xml(xml_content: str) -> None:
333
- try:
334
- ET.fromstring(xml_content)
335
- except ET.ParseError:
336
- _hide_password(xml_content)
337
- raise CloudInputError(
338
- f"Invalid XML content.\n"
339
- f"Contact PSR support at psrcloud@psr-inc.com with following data:\n\n{xml_content}\n\n"
340
- )
341
-
342
- def _get_clusters_by_user(self) -> list:
343
- try:
344
- previous_cluster = self.cluster
345
- self.cluster = _DEFAULT_CLUSTER
346
- xml = self._make_soap_request("listarCluster", "listaCluster")
347
-
348
- clusters = []
349
- for cluster in xml.findall("Cluster"):
350
- nome = cluster.attrib.get("nome")
351
- url = cluster.attrib.get("urlServico") + "/DespachanteWS.asmx"
352
- pretty_name = cluster.attrib.get("legenda", nome)
353
- clusters.append({"name": nome, "pretty_name": pretty_name, "url": url})
354
-
355
- self.cluster = previous_cluster
356
- except Exception as e:
357
- self.cluster = previous_cluster
358
- raise e
359
- return clusters
360
-
361
- def get_clusters(self) -> List[str]:
362
- clusters = self._get_clusters_by_user()
363
- return [cluster["pretty_name"] for cluster in clusters]
364
-
365
- def _run_console(self, xml_content: str) -> None:
366
- self._check_xml(xml_content)
367
- delete_xml = not self._debug_mode
368
- with CreateTempFile(
369
- str(self.cwd), "psr_cloud_", xml_content, delete_xml
370
- ) as xml_file:
371
- xml_file.close()
372
- command = [self._get_console_path(), xml_file.name]
373
- command_str = " ".join(map(str, command))
374
- self._logger.debug(f"Running console command {command_str}")
375
- quiet_goes_to_log = subprocess.PIPE if self._debug_mode else None
376
- if self._verbose:
377
- proc_stdout = subprocess.PIPE
378
- proc_stderr = subprocess.PIPE
379
- else:
380
- if self._quiet:
381
- proc_stdout = quiet_goes_to_log
382
- proc_stderr = quiet_goes_to_log
383
- else:
384
- proc_stdout = subprocess.PIPE
385
- proc_stderr = None
386
- try:
387
- process = subprocess.Popen(
388
- command, stdout=proc_stdout, stderr=proc_stderr, shell=False
389
- )
390
- enable_log_timestamp(self._logger, False)
391
- if proc_stdout is not None:
392
- with process.stdout:
393
- for line in iter(process.stdout.readline, b""):
394
- if self._verbose:
395
- self._logger.info(line.decode().strip())
396
- else:
397
- self._logger.debug(line.decode().strip())
398
- if proc_stderr is not None:
399
- with process.stderr:
400
- for line in iter(process.stderr.readline, b""):
401
- self._logger.error(line.decode().strip())
402
- enable_log_timestamp(self._logger, True)
403
- result = process.wait(timeout=self._timeout)
404
-
405
- if result != 0:
406
- err_msg = (
407
- f"PSR Cloud console command failed with return code {result}"
408
- )
409
- self._logger.error(err_msg)
410
- raise CloudError(err_msg)
411
- except subprocess.CalledProcessError as e:
412
- err_msg = f"PSR Cloud console command failed with exception: {str(e)}"
413
- self._logger.error(err_msg)
414
- raise CloudError(err_msg)
415
-
416
- def _validate_case(self, case: "Case") -> "Case":
417
- if not case.program:
418
- raise CloudInputError("Program not provided")
419
- elif case.program not in self.get_programs():
420
- raise CloudInputError(
421
- f"Program {case.program} not found. Available programs are: {', '.join(self.get_programs())}"
422
- )
423
-
424
- if not case.memory_per_process_ratio:
425
- raise CloudInputError("Memory per process ratio not provided")
426
- elif case.memory_per_process_ratio not in self.get_memory_per_process_ratios():
427
- raise CloudInputError(
428
- f"Memory per process ratio {case.memory_per_process_ratio} not found. Available ratios are: {', '.join(self.get_memory_per_process_ratios())}"
429
- )
430
-
431
- if case.number_of_processes < 1 or case.number_of_processes > 512:
432
- raise CloudInputError("Number of processes must be between 1 and 512")
433
-
434
- if case.data_path and not Path(case.data_path).exists():
435
- raise CloudInputError("Data path does not exist")
436
-
437
- if case.parent_case_id is None:
438
- case.parent_case_id = 0
439
-
440
- def validate_selection(
441
- selection, available_options, selection_name, program_name
442
- ):
443
- if selection is None:
444
- raise CloudInputError(
445
- f"{selection_name} of program {program_name} not provided"
446
- )
447
- elif isinstance(selection, str):
448
- if selection not in available_options.values():
449
- raise CloudInputError(
450
- f"{selection_name} {selection} of program {program_name} not found. Available {selection_name.lower()}s are: {', '.join(available_options.values())}"
451
- )
452
- return next(
453
- key
454
- for key, value in available_options.items()
455
- if value == selection
456
- )
457
- elif selection not in available_options:
458
- raise CloudInputError(
459
- f"{selection_name} id {selection} of program {program_name} not found. Available {selection_name.lower()} ids are: {', '.join(map(str,available_options.keys()))}"
460
- )
461
- return selection
462
-
463
- program_versions = self.get_program_versions(case.program)
464
- case.program_version_name = case.program_version
465
- case.program_version = validate_selection(
466
- case.program_version, program_versions, "Version", case.program
467
- )
468
-
469
- execution_types = self.get_execution_types(case.program, case.program_version)
470
- case.execution_type = validate_selection(
471
- case.execution_type, execution_types, "Execution type", case.program
472
- )
473
-
474
- instance_type_map = self._get_instance_type_map()
475
- if all(value[1] == False for value in instance_type_map.values()):
476
- is_spot_disabled = True
477
- else:
478
- is_spot_disabled = False
479
-
480
- if case.price_optimized == True and is_spot_disabled == True:
481
- raise CloudError("Price Optimized is temporarily unavailable.")
482
-
483
- repository_durations = self.get_repository_durations()
484
- case.repository_duration = validate_selection(
485
- case.repository_duration,
486
- repository_durations,
487
- "Repository duration",
488
- case.program,
489
- )
490
-
491
- if case.budget:
492
- budgets = self.get_budgets()
493
- match_list = _budget_matches_list(case.budget, budgets)
494
- if len(match_list) == 0:
495
- raise CloudInputError(
496
- f'Budget "{case.budget}" not found. Get a list of available budgets using Client().get_budgets().'
497
- )
498
- elif len(match_list) > 1:
499
- raise CloudInputError(
500
- f'Multiple budgets found for "{case.budget}". Please use the budget id instead of the name.\n'
501
- "\n".join([f' - "{budget}"' for budget in match_list])
502
- )
503
- else:
504
- # Replace partial with complete budget name
505
- case.budget = match_list[0]
506
-
507
- # MyModel
508
- if case.program == "MyModel":
509
- if case.mymodel_program_files is None:
510
- raise CloudInputError("MyModel program files not provided")
511
-
512
- if case.program != "MyModel" and case.mymodel_program_files is not None:
513
- msg = "Ignoring mymodel_program_files parameter for non MyModel case."
514
- warnings.warn(msg)
515
- return case
516
-
517
- def _pre_process_graph(self, path: str, case_id: int) -> None:
518
- # This method is only used for testing the graf cloud execution.
519
- # Error handling is already done on the tests module.
520
- parameters = {
521
- "urlServico": self.cluster["url"],
522
- "usuario": self.username,
523
- "senha": self.__password,
524
- "idioma": "3",
525
- "modelo": "Graf",
526
- "comando": "PreProcessamento",
527
- "cluster": self.cluster["name"],
528
- "repositorioId": str(case_id),
529
- "diretorioDestino": path,
530
- "tipoExecucao": "1",
531
- }
532
-
533
- xml_content = create_case_xml(parameters)
534
- self._run_console(xml_content)
535
-
536
- def _check_until_status(
537
- self, case_id: int, requested_status: "ExecutionStatus", timeout: int = 60 * 60
538
- ) -> bool:
539
- """
540
- Check the status of a case until the requested status is reached or timeout occurs.
541
-
542
- :param case_id: The ID of the case to check.
543
- :param requested_status: The status to wait for.
544
- :param timeout: The maximum time to wait in seconds (default is 3600 seconds or 1 hour).
545
- :return: True if the requested status is reached, False if timeout occurs.
546
- """
547
- status = None
548
- last_status = None
549
- start_time = time()
550
- original_quiet_flag = self._quiet
551
- original_verbose_flag = self._verbose
552
- original_debug_flag = self._debug_mode
553
- self._quiet, self._verbose, self._debug_mode = True, False, False
554
- try:
555
- while status not in FAULTY_TERMINATION_STATUS + [
556
- ExecutionStatus.SUCCESS,
557
- requested_status,
558
- ]:
559
- if time() - start_time > timeout:
560
- self._logger.error(
561
- f"Timeout reached while waiting for status {requested_status}"
562
- )
563
- return False
564
- status, _ = self.get_status(case_id, quiet=True)
565
- if last_status != status:
566
- self._logger.info(f"Status: {STATUS_MAP_TEXT[status]}")
567
- last_status = status
568
- sleep(20)
569
- finally:
570
- self._quiet = original_quiet_flag
571
- self._verbose = original_verbose_flag
572
- self._debug_mode = original_debug_flag
573
-
574
- return status == requested_status
575
-
576
- def _clean_folder(self, folder):
577
- for root, dirs, files in os.walk(folder, topdown=False):
578
- for file in files:
579
- file_path = os.path.join(root, file)
580
- os.remove(file_path)
581
-
582
- for dir in dirs:
583
- dir_path = os.path.join(root, dir)
584
- os.rmdir(dir_path)
585
-
586
- @thread_safe()
587
- def run_case(self, case: "Case", dry_run=False, **kwargs) -> int:
588
- self._validate_case(case)
589
- instance_type_map = self._get_instance_type_map()
590
- instance_type_id = next(
591
- key
592
- for key, value in instance_type_map.items()
593
- if value[0] == case.memory_per_process_ratio
594
- and value[1] == case.price_optimized
595
- )
596
- case.data_path = _handle_relative_path(case.data_path)
597
-
598
- if case.program == "GRAF":
599
- wait = True
600
- else:
601
- wait = kwargs.get("wait", False)
602
-
603
- if self.application_version:
604
- interface_version = self.application_version + " - " + INTERFACE_VERSION
605
- else:
606
- interface_version = INTERFACE_VERSION
607
- parameters = {
608
- "urlServico": self.cluster["url"],
609
- "usuario": self.username,
610
- "senha": self.__password,
611
- "idioma": "3",
612
- "modelo": case.program,
613
- "comando": "executar",
614
- "cluster": self.cluster["name"],
615
- "diretorioDados": case.data_path,
616
- "origemDados": "LOCAL",
617
- "s3Dados": "",
618
- "nproc": case.number_of_processes,
619
- "repositorioId": "0",
620
- "instanciaTipo": instance_type_id,
621
- "validacaoModelo": "True",
622
- "validacaoUsuario": "False",
623
- "idVersao": case.program_version,
624
- "modeloVersao": case.program_version_name,
625
- "pathModelo": "C:\\PSR",
626
- "idTipoExecucao": case.execution_type,
627
- "nomeCaso": case.name,
628
- "tipoExecucao": str(int(not wait)),
629
- "deveAgendar": "False",
630
- "userTag": "(Untagged)",
631
- "lifecycle": case.repository_duration,
632
- "versaoInterface": interface_version,
633
- "pathPrograma": case.mymodel_program_files,
634
- "arquivoSaida": case.mymodel_output_file,
635
- }
636
-
637
- if isinstance(case.parent_case_id, list) and case.parent_case_id is not None:
638
- parameters["repositoriosPais"] = ",".join(map(str, case.parent_case_id))
639
- else:
640
- parameters["repositorioPai"] = case.parent_case_id
641
-
642
- if case.budget:
643
- parameters["budget"] = case.budget
644
- if case.upload_only is not None:
645
- parameters["saveInCloud"] = case.upload_only
646
-
647
- xml_content = create_case_xml(parameters)
648
-
649
- if dry_run:
650
- return xml_content
651
-
652
- if self._python_client:
653
- case_id = self._execute_case(parameters)
654
- else:
655
- self._run_console(xml_content)
656
- xml = ET.parse(
657
- f"{self._get_console_parent_path()}\\fake{case.program}_async.xml"
658
- )
659
- _check_for_errors(xml, self._logger)
660
- id_parameter = xml.find("./Parametro[@nome='repositorioId']")
661
- if id_parameter is None:
662
- xml_str = _xml_to_str(xml)
663
- raise CloudError(
664
- f"Case id not found on returned XML response.\n"
665
- f"Contact PSR support at psrcloud@psr-inc.com with following data:\n\n{xml_str}\n\n"
666
- )
667
-
668
- case_id = int(id_parameter.text)
669
- if not wait:
670
- self._logger.info(f"Case {case.name} started with id {case_id}")
671
-
672
- if self._import_desktop and case.program != "GRAF":
673
- try:
674
- case_copy = copy.deepcopy(case)
675
- case_copy.id = case_id
676
- replace_case_str_values(self, case_copy)
677
- import_case(case_copy, self.cluster["name"], instance_type_id)
678
- except Exception as e:
679
- msg = f"Failed to import case {case.name} to desktop:\n{str(e)}"
680
- self._logger.error(msg)
681
- warnings.warn(msg)
682
-
683
- return case_id
684
-
685
- def get_status(self, case_id: int, quiet=False) -> tuple["ExecutionStatus", str]:
686
- delete_xml = not self._debug_mode
687
- xml_content = ""
688
- with CreateTempFile(
689
- str(self.cwd), "psr_cloud_status_", xml_content, delete_xml
690
- ) as xml_file:
691
- status_xml_path = os.path.abspath(xml_file.name)
692
-
693
- parameters = {
694
- "urlServico": self.cluster["url"],
695
- "usuario": self.username,
696
- "senha": self.__password,
697
- "idioma": "3",
698
- "idFila": str(case_id),
699
- "modelo": "SDDP",
700
- "comando": "obterstatusresultados",
701
- "arquivoSaida": status_xml_path,
702
- }
703
-
704
- xml = None
705
- if self._python_client:
706
- xml = self._get_status_python(parameters)
707
- else:
708
- run_xml_content = create_case_xml(parameters)
709
- self._run_console(run_xml_content)
710
- xml = ET.parse(status_xml_path)
711
- parameter_status = xml.find("./Parametro[@nome='statusExecucao']")
712
- if parameter_status is None:
713
- xml_str = _xml_to_str(xml)
714
- raise CloudError(
715
- f"Status not found on returned XML response.\n"
716
- f"Contact PSR support at psrcloud@psr-inc.com with following data:\n\n{xml_str}\n\n"
717
- )
718
- try:
719
- status = ExecutionStatus(int(parameter_status.text))
720
- except CloudError:
721
- xml_str = _xml_to_str(xml)
722
- raise CloudError(
723
- f"Unrecognized status on returned XML response.\n"
724
- f"Contact PSR support at psrcloud@psr-inc.com with following data:\n\n{xml_str}\n\n"
725
- )
726
-
727
- if not quiet:
728
- self._logger.info(f"Status: {STATUS_MAP_TEXT[status]}")
729
- return status, STATUS_MAP_TEXT[status]
730
-
731
- def list_download_files(self, case_id: int) -> List[dict]:
732
- xml_files = self._make_soap_request(
733
- "prepararListaArquivosRemotaDownload",
734
- "listaArquivoRemota",
735
- additional_arguments={
736
- "cluster": self.cluster["name"],
737
- "filtro": "(.*)",
738
- "diretorioRemoto": str(case_id),
739
- },
740
- )
741
-
742
- files = []
743
-
744
- for file in xml_files.findall("Arquivo"):
745
- file_info = {
746
- "name": file.attrib.get("nome"),
747
- "filesize": file.attrib.get("filesize"),
748
- "filedate": file.attrib.get("filedate"),
749
- }
750
- files.append(file_info)
751
-
752
- return files
753
-
754
- def download_results(
755
- self,
756
- case_id: int,
757
- output_path: Union[str, Path],
758
- files: Optional[List[str]] = None,
759
- extensions: Optional[List[str]] = None,
760
- ) -> None:
761
- filter = ""
762
-
763
- if not extensions and not files:
764
- extensions = ["csv", "log", "hdr", "bin", "out", "ok"]
765
-
766
- filter_elements = []
767
-
768
- if extensions:
769
- Client._validate_extensions(extensions)
770
- filter_elements.extend([f"*.{ext}" for ext in extensions])
771
-
772
- if files:
773
- filter_elements.extend(files)
774
-
775
- filter += "|".join(filter_elements)
776
-
777
- self._logger.info("Download filter: " + filter)
778
- case = self.get_case(case_id)
779
- output_path = _handle_relative_path(output_path)
780
- parameters = {
781
- "urlServico": self.cluster["url"],
782
- "usuario": self.username,
783
- "senha": self.__password,
784
- "idioma": "3",
785
- "_cluster": self.cluster["name"],
786
- "modelo": case.program,
787
- "comando": "download",
788
- "diretorioDestino": output_path,
789
- "repositorioId": str(case_id),
790
- "filtroDownloadPorMascara": filter,
791
- }
792
-
793
- os.makedirs(output_path, exist_ok=True)
794
-
795
- if self._python_client:
796
- self._download_results_python(parameters) ## Not implemented yet
797
- else:
798
- # Download results using Console
799
- xml_content = create_case_xml(parameters)
800
- self._run_console(xml_content)
801
- self._logger.info(f"Results downloaded to {output_path}")
802
-
803
- def cancel_case(self, case_id: int, wait: bool = False) -> bool:
804
- parameters = {
805
- "urlServico": self.cluster["url"],
806
- "usuario": self.username,
807
- "senha": self.__password,
808
- "idioma": "3",
809
- "modelo": "SDDP",
810
- "comando": "cancelarfila",
811
- "cancelamentoForcado": "False",
812
- "idFila": str(case_id),
813
- }
814
-
815
- if self._python_client:
816
- self._cancel_case_python(case_id, parameters)
817
- else:
818
- # Cancel case using Console
819
- xml_content = create_case_xml(parameters)
820
- self._run_console(xml_content)
821
- self._logger.info(f"Request to cancel case {case_id} was sent")
822
-
823
- if wait:
824
- self._logger.info(f"Waiting for case {case_id} to be canceled")
825
- if self._check_until_status(
826
- case_id, ExecutionStatus.CANCELLED, timeout=60 * 10
827
- ):
828
- self._logger.info(f"Case {case_id} was successfully canceled")
829
- return True
830
- else:
831
- self._logger.error(f"Failed to cancel case {case_id}")
832
- return False
833
- else:
834
- return True
835
-
836
- def _cases_from_xml(self, xml: ET.Element) -> List["Case"]:
837
- instance_type_map = self._get_instance_type_map()
838
- cases = []
839
- for fila in xml.findall("Fila"):
840
- try:
841
- case = Case(
842
- name=fila.attrib.get("nomeCaso"),
843
- data_path=None,
844
- program=fila.attrib.get("programa"),
845
- program_version=int(fila.attrib.get("idVersao")),
846
- execution_type=int(fila.attrib.get("idTipoExecucao")),
847
- price_optimized=bool(fila.attrib.get("flagSpot")),
848
- number_of_processes=int(fila.attrib.get("numeroProcesso")),
849
- id=int(fila.attrib.get("repositorioId")),
850
- user=fila.attrib.get("usuario"),
851
- execution_date=datetime.strptime(
852
- fila.attrib.get("dataInicio"), "%d/%m/%Y %H:%M"
853
- ),
854
- parent_case_id=int(fila.attrib.get("repositorioPai"))
855
- if fila.attrib.get("repositorioPai")
856
- else 0,
857
- memory_per_process_ratio=(
858
- instance_type_map[int(fila.attrib.get("instanciaTipo"))][0]
859
- if fila.attrib.get("instanciaTipo") in instance_type_map
860
- else min([value[0] for value in instance_type_map.values()])
861
- ),
862
- repository_duration=int(fila.attrib.get("duracaoRepositorio")),
863
- budget=fila.attrib.get("budget"),
864
- )
865
- cases.append(case)
866
- except (TypeError, ValueError):
867
- pass
868
- # case_id = fila.attrib.get("repositorioId")
869
- # Optionally, log the error or handle it as needed
870
- # self._logger.error(f"Error processing case with ID {case_id}: {e}")
871
-
872
- # sort cases by execution date desc
873
- cases.sort(key=lambda x: x.execution_date, reverse=True)
874
- return cases
875
-
876
- def get_all_cases_since(
877
- self, since: Union[int, datetime] = _DEFAULT_GET_CASES_SINCE_DAYS
878
- ) -> List["Case"]:
879
- if isinstance(since, int):
880
- initial_date = datetime.now() - timedelta(days=since)
881
- initial_date_iso = initial_date.isoformat().replace("T", " ")[:-7]
882
- else:
883
- initial_date_iso = since.strftime("%Y-%m-%d %H:%M:%S")
884
-
885
- xml = self._make_soap_request(
886
- "listarFila",
887
- "dados",
888
- additional_arguments={"dataInicial": initial_date_iso},
889
- )
890
-
891
- return self._cases_from_xml(xml)
892
-
893
- def get_case(self, case_id: int) -> "Case":
894
- cases = self.get_cases([case_id])
895
- if cases and len(cases) > 0:
896
- return cases[0]
897
- raise CloudInputError(f"Case {case_id} not found")
898
-
899
- def get_cases(self, case_ids: List[int]) -> List["Case"]:
900
- case_ids_str = ",".join(map(str, case_ids))
901
- xml = self._make_soap_request(
902
- "listarFila",
903
- "dados",
904
- additional_arguments={"listaRepositorio": case_ids_str},
905
- )
906
- return self._cases_from_xml(xml)
907
-
908
- def get_budgets(self) -> list:
909
- xml = self._make_soap_request(
910
- "listarCluster",
911
- "listaCluster",
912
- )
913
-
914
- budgets = []
915
- for cluster in xml.findall("Cluster"):
916
- if cluster.attrib.get("nome").lower() == self.cluster["name"].lower():
917
- collection = cluster.findall("ColecaoBudget")[0]
918
- budgets = [
919
- budget.attrib.get("nome") for budget in collection.findall("Budget")
920
- ]
921
- break
922
- budgets.sort()
923
- return budgets
924
-
925
- def get_number_of_processes(self, programa_nome):
926
- xml = self._get_cloud_versions_xml()
927
-
928
- programa = xml.find(f".//Programa[@nome='{programa_nome}']")
929
- if programa is None:
930
- raise CloudError(f"Programa '{programa_nome}' não encontrado.")
931
-
932
- cluster = programa.find(f".//Cluster[@nome='{self.cluster['name']}']")
933
- if cluster is None:
934
- raise CloudError(
935
- f"Cluster '{self.cluster['name']}' não encontrado no programa '{programa_nome}'."
936
- )
937
-
938
- maximo_processos = cluster.get("maximoProcessos")
939
- processos_por_maquina = cluster.get("processosPorMaquina")
940
-
941
- if maximo_processos and processos_por_maquina:
942
- maximo_processos = int(maximo_processos)
943
- processos_por_maquina = int(processos_por_maquina)
944
-
945
- lista_processos = list(
946
- range(
947
- processos_por_maquina, maximo_processos + 1, processos_por_maquina
948
- )
949
- )
950
-
951
- return lista_processos
952
-
953
- raise CloudError(f"Invalid values for cluster '{self.cluster['name']}'.")
954
-
955
- def _make_soap_request(self, service: str, name: str = "", **kwargs) -> ET.Element:
956
- portal_ws = zeep.Client(self.cluster["url"] + "?WSDL")
957
- section = str(id(self))
958
- password_md5 = self.__password.upper()
959
- additional_arguments = kwargs.get("additional_arguments", None)
960
- parameters = {
961
- "sessao_id": section,
962
- "tipo_autenticacao": "bcrypt",
963
- "idioma": "3",
964
- "versao_cliente": self._get_console_version().split("-")[0]
965
- if not self._python_client
966
- else "5.4.0",
967
- }
968
- if additional_arguments:
969
- parameters.update(additional_arguments)
970
-
971
- xml_input = create_case_xml(parameters)
972
-
973
- try:
974
- xml_output_str = portal_ws.service.despacharServico(
975
- service, self.username, password_md5, xml_input
976
- )
977
- except zeep.exceptions.Fault as e:
978
- # Log the full exception details
979
- self._logger.error(f"Zeep Fault: {str(e)}")
980
- raise CloudError(
981
- "Failed to connect to PSR Cloud service. Contact PSR support: psrcloud@psr-inc.com"
982
- )
983
- # Remove control characters - this is a thing
984
- xml_output_str = xml_output_str.replace("&amp;#x1F;", "")
985
-
986
- xml_output = ET.fromstring(xml_output_str)
987
-
988
- if name:
989
- for child in xml_output:
990
- if child.attrib.get("nome") == name:
991
- xml_output = ET.fromstring(child.text)
992
- break
993
- else:
994
- raise ValueError(
995
- f"Invalid XML response from PSR Cloud: {xml_output_str}. Please contact PSR support at psrcloud@psr-inc.com"
996
- )
997
- return xml_output
998
-
999
- def _get_cloud_versions_xml(self) -> ET.Element:
1000
- if self._cloud_version_xml_cache is not None:
1001
- return self._cloud_version_xml_cache
1002
- self._cloud_version_xml_cache = self._make_soap_request("obterVersoes", "dados")
1003
- return self._cloud_version_xml_cache
1004
-
1005
- def _get_cloud_clusters_xml(self) -> ET.Element:
1006
- if self._cloud_clusters_xml_cache is not None:
1007
- return self._cloud_clusters_xml_cache
1008
- self._cloud_clusters_xml_cache = self._make_soap_request(
1009
- "listarCluster", "listaCluster"
1010
- )
1011
- return self._cloud_clusters_xml_cache
1012
-
1013
- def _execute_case(self, case_dict) -> int:
1014
- """
1015
- Execute a case on the PSR Cloud.
1016
- :param case_dict: Dictionary containing the case parameters.
1017
- :return: Case ID of the executed case.
1018
- """
1019
- case_dict["programa"] = case_dict["modelo"]
1020
- case_dict["numeroProcessos"] = case_dict["nproc"]
1021
- case_dict["versao_cliente"] = "5.4.0"
1022
- filter_request_result = self._make_soap_request(
1023
- "obterFiltros", additional_arguments=case_dict
1024
- )
1025
- upload_filter = filter_request_result.find(
1026
- "./Parametro[@nome='filtroUpload']"
1027
- ).text
1028
- upload_filter = "^[a-zA-Z0-9./_]*(" + upload_filter + ")$"
1029
-
1030
- # Create Repository
1031
- self._logger.info("Creating remote repository")
1032
- repository_request_result = self._make_soap_request(
1033
- "criarRepositorio", additional_arguments=case_dict
1034
- )
1035
-
1036
- # Add all parameters from the XML response to case_dict
1037
- # Iterates over each <Parametro> element in the XML response,
1038
- # extracts the 'nome' attribute for the key and the element's text for the value,
1039
- # then adds this key-value pair to case_dict.
1040
- for parametro_element in repository_request_result.findall("./Parametro"):
1041
- param_name = parametro_element.get("nome")
1042
- param_value = (
1043
- parametro_element.text
1044
- ) # This will be None if the element has no text.
1045
- if param_name: # Ensure the parameter has a name before adding.
1046
- case_dict[param_name] = param_value
1047
-
1048
- repository_id = repository_request_result.find(
1049
- "./Parametro[@nome='repositorioId']"
1050
- )
1051
- cloud_access = repository_request_result.find(
1052
- "./Parametro[@nome='cloudAccess']"
1053
- )
1054
- cloud_secret = repository_request_result.find(
1055
- "./Parametro[@nome='cloudSecret']"
1056
- )
1057
- cloud_session_token = repository_request_result.find(
1058
- "./Parametro[@nome='cloudSessionToken']"
1059
- )
1060
- cloud_aws_url = repository_request_result.find("./Parametro[@nome='cloudUrl']")
1061
- bucket_name = repository_request_result.find(
1062
- "./Parametro[@nome='diretorioBase']"
1063
- )
1064
-
1065
- self._logger.info(f"Remote repository created with ID {repository_id.text}")
1066
- case_dict["repositorioId"] = repository_id.text
1067
-
1068
- # Filtering files to upload
1069
- self._logger.info("Checking list of files to send")
1070
-
1071
- file_list = _filter_upload_files(case_dict["diretorioDados"], upload_filter)
1072
-
1073
- if not file_list:
1074
- self._logger.warning(
1075
- "No files found to upload. Please check the upload filter."
1076
- )
1077
- return
1078
-
1079
- # generating .metadata folder with checksum for each file
1080
- checksum_dictionary = {}
1081
- metadata_folder = Path(case_dict["diretorioDados"]) / ".metadata"
1082
- metadata_folder.mkdir(parents=True, exist_ok=True)
1083
- for file_path in file_list:
1084
- file_path = Path(file_path)
1085
- if not file_path.is_absolute():
1086
- file_path = Path(case_dict["diretorioDados"]) / file_path
1087
- if not file_path.exists():
1088
- self._logger.warning(f"File {file_path} does not exist. Skipping.")
1089
- continue
1090
- with open(file_path, "rb") as f:
1091
- checksum = _md5sum(f.read(), enconding=False).upper()
1092
- checksum_dictionary[file_path.name] = checksum
1093
- metadata_file = metadata_folder / (file_path.name)
1094
- with open(metadata_file, "w") as f:
1095
- f.write(checksum)
1096
-
1097
- self._logger.info(
1098
- f"Uploading list of files to remote repository {repository_id.text}"
1099
- )
1100
-
1101
- # Uploading files to S3
1102
- upload_case_to_s3(
1103
- files=file_list,
1104
- repository_id=repository_id.text,
1105
- cluster_name=self.cluster["name"],
1106
- checksums=checksum_dictionary,
1107
- access=cloud_access.text if cloud_access is not None else None,
1108
- secret=cloud_secret.text if cloud_secret is not None else None,
1109
- session_token=cloud_session_token.text
1110
- if cloud_session_token is not None
1111
- else None,
1112
- bucket_name=bucket_name.text if bucket_name is not None else None,
1113
- url=cloud_aws_url.text if cloud_aws_url is not None else None,
1114
- zip_compress=True,
1115
- )
1116
-
1117
- self._make_soap_request(
1118
- "finalizarUpload",
1119
- additional_arguments={"repositorioId": repository_id.text},
1120
- )
1121
- self._logger.info("Files uploaded successfully. Enqueuing case.")
1122
- self._make_soap_request("enfileirarProcesso", additional_arguments=case_dict)
1123
-
1124
- return repository_id.text
1125
-
1126
- def _get_status_python(self, case_dict: dict) -> ET.Element:
1127
- """
1128
- Get the status of a case using the Python client.
1129
- :param case_dict: Dictionary containing the case parameters.
1130
- :return: XML Element with the status information.
1131
- """
1132
- try:
1133
- response = self._make_soap_request(
1134
- "obterStatusResultados", additional_arguments=case_dict
1135
- )
1136
-
1137
- # change response "status" parameter to "statusExecucao", as it is with current PSR Cloud
1138
- status_param = response.find("./Parametro[@nome='status']")
1139
- if status_param is not None:
1140
- status_param.attrib["nome"] = "statusExecucao"
1141
-
1142
- result_log = response.find("./Parametro[@nome='resultado']")
1143
- if self._verbose and result_log is not None:
1144
- self._logger.info(result_log.text)
1145
- return response
1146
- except Exception as e:
1147
- self._logger.error(f"Error getting status: {str(e)}")
1148
- raise CloudError(f"Failed to get status: {str(e)}")
1149
-
1150
- def _cancel_case_python(self, case_id: int, xml_content: str) -> None:
1151
- """
1152
- Cancel a case using the Python client.
1153
- :param case_id: The ID of the case to cancel.
1154
- :param xml_content: XML content for the cancel request.
1155
- """
1156
- try:
1157
- self._make_soap_request(
1158
- "cancelarFila", additional_arguments={"idFila": str(case_id)}
1159
- )
1160
- except Exception as e:
1161
- self._logger.error(f"Error cancelling case: {str(e)}")
1162
- raise CloudError(f"Failed to cancel case: {str(e)}")
1163
-
1164
- def _download_results_python(self, parameters: dict) -> None:
1165
- """
1166
- Download results using the Python client.
1167
- :param parameters: Dictionary containing the download parameters.
1168
- """
1169
-
1170
- repository_id = parameters.get("repositorioId")
1171
- download_filter = parameters.get("filtroDownloadPorMascara")
1172
- output_path = parameters.get("diretorioDestino")
1173
-
1174
- download_filter = (
1175
- "^[a-zA-Z0-9./_]*(" + download_filter + ")$" if download_filter else None
1176
- )
1177
- self._logger.info("Obtaining credentials for download")
1178
- credentials = self._make_soap_request(
1179
- "buscaCredenciasDownload", additional_arguments=parameters
1180
- )
1181
-
1182
- access = credentials.find("./Parametro[@nome='cloudAccess']").text
1183
- secret = credentials.find("./Parametro[@nome='cloudSecret']").text
1184
- session_token = credentials.find("./Parametro[@nome='cloudSessionToken']").text
1185
- url = credentials.find("./Parametro[@nome='cloudUrl']").text
1186
- bucket_name = credentials.find("./Parametro[@nome='diretorioBase']").text
1187
- bucket_name = bucket_name.replace("repository", "repository-download")
1188
-
1189
- if access is None or secret is None or session_token is None or url is None:
1190
- raise CloudError("Failed to retrieve credentials for downloading results.")
1191
-
1192
- file_list = self.list_download_files(repository_id)
1193
-
1194
- # filtering files to download
1195
- if download_filter:
1196
- filtered_file_list = [
1197
- file["name"]
1198
- for file in file_list
1199
- if re.match(download_filter, file["name"])
1200
- ]
1201
- else:
1202
- filtered_file_list = [file["name"] for file in file_list]
1203
-
1204
- self._logger.info("Downloading results")
1205
- downloaded_list = download_case_from_s3(
1206
- repository_id=parameters["repositorioId"],
1207
- cluster_name=self.cluster["name"],
1208
- access=access,
1209
- secret=secret,
1210
- session_token=session_token,
1211
- bucket_name=bucket_name,
1212
- url=url,
1213
- output_path=output_path,
1214
- file_list=filtered_file_list,
1215
- )
1216
-
1217
- # Decompress gzipped files
1218
- for file in filtered_file_list:
1219
- if self._is_file_gzipped(os.path.join(output_path, file)):
1220
- self._decompress_gzipped_file(os.path.join(output_path, file))
1221
-
1222
- # Check if all requested files were downloaded
1223
- for file in filtered_file_list:
1224
- if file not in downloaded_list:
1225
- self._logger.warning(f"File {file} was not downloaded.")
1226
-
1227
- self._logger.info(f"Results downloaded to {output_path}")
1228
-
1229
- def _is_file_gzipped(self, file_path: str) -> bool:
1230
- """
1231
- Checks if a file is gzipped by inspecting its magic number.
1232
-
1233
- :param file_path: The path to the file.
1234
- :return: True if the file is gzipped, False otherwise.
1235
- """
1236
- try:
1237
- with open(file_path, "rb") as f_check:
1238
- return f_check.read(2) == b"\x1f\x8b"
1239
- except IOError:
1240
- # TODO: Replace print with proper logging
1241
- print(
1242
- f"WARNING: Could not read {file_path} to check for gzip magic number."
1243
- )
1244
- return False
1245
-
1246
- def _decompress_gzipped_file(self, gzipped_file_path: str) -> str:
1247
- """
1248
- Decompresses a gzipped file.
1249
-
1250
- If the original filename ends with .gz, the .gz is removed for the
1251
- decompressed filename. Otherwise, the file is decompressed in-place.
1252
- The original gzipped file is removed upon successful decompression.
1253
-
1254
- :param gzipped_file_path: The path to the gzipped file.
1255
- :return: The path to the decompressed file. If decompression fails,
1256
- the original gzipped_file_path is returned.
1257
- """
1258
- decompressed_target_path = (
1259
- gzipped_file_path[:-3]
1260
- if gzipped_file_path.lower().endswith(".gz")
1261
- else gzipped_file_path
1262
- )
1263
- # Use a temporary file for decompression to avoid data loss if issues occur
1264
- temp_decompressed_path = decompressed_target_path + ".decompressing_tmp"
1265
-
1266
- try:
1267
- with gzip.open(gzipped_file_path, "rb") as f_in, open(
1268
- temp_decompressed_path, "wb"
1269
- ) as f_out:
1270
- shutil.copyfileobj(f_in, f_out)
1271
- os.remove(gzipped_file_path)
1272
- os.rename(temp_decompressed_path, decompressed_target_path)
1273
- return decompressed_target_path
1274
- except (gzip.BadGzipFile, EOFError, IOError) as e:
1275
- print(
1276
- f"ERROR: Failed to decompress {gzipped_file_path}: {e}. Original file kept."
1277
- )
1278
- except (
1279
- Exception
1280
- ) as e: # Catch other errors like permission issues during rename/remove
1281
- print(
1282
- f"ERROR: Error during post-decompression file operations for {gzipped_file_path}: {e}. Original file kept."
1283
- )
1284
- finally:
1285
- if os.path.exists(
1286
- temp_decompressed_path
1287
- ): # Clean up temp file if it still exists
1288
- os.remove(temp_decompressed_path)
1289
- return gzipped_file_path # Return original path if decompression failed
1290
-
1291
- def get_programs(self) -> List[str]:
1292
- xml = self._get_cloud_versions_xml()
1293
- programs = [model.attrib["nome"] for model in xml]
1294
- return [program for program in programs if program in _ALLOWED_PROGRAMS]
1295
-
1296
- def get_program_versions(self, program: str) -> dict[int, str]:
1297
- if not isinstance(program, str):
1298
- raise CloudInputError("Program must be a string")
1299
- elif program not in self.get_programs():
1300
- raise CloudInputError(
1301
- f"Program {program} not found. Available programs: {', '.join(self.get_programs())}"
1302
- )
1303
- xml = self._get_cloud_versions_xml()
1304
- versions = {}
1305
-
1306
- for model in xml:
1307
- if model.attrib["nome"] == program:
1308
- for version_child in model.findall(".//Versao"):
1309
- version_id = int(version_child.attrib["id"])
1310
- version_name = version_child.attrib["versao"]
1311
- versions[version_id] = version_name
1312
-
1313
- return versions
1314
-
1315
- def get_execution_types(
1316
- self, program: str, version: Union[str, int]
1317
- ) -> dict[int, str]:
1318
- if not isinstance(program, str):
1319
- raise CloudInputError("Program must be a string")
1320
- elif program not in self.get_programs():
1321
- raise CloudInputError(
1322
- f"Program {program} not found. Available programs: {', '.join(self.get_programs())}"
1323
- )
1324
- if isinstance(version, int):
1325
- if version not in self.get_program_versions(program):
1326
- raise CloudInputError(
1327
- f"Version id {version} of program {program} not found. Available version ids: {', '.join(map(str, list(self.get_program_versions(program).keys())))}"
1328
- )
1329
- version = next(
1330
- v for k, v in self.get_program_versions(program).items() if k == version
1331
- )
1332
- elif version not in self.get_program_versions(program).values():
1333
- raise CloudInputError(
1334
- f"Version {version} of program {program} not found. Available versions: {', '.join(self.get_program_versions(program).values())}"
1335
- )
1336
- xml = self._get_cloud_versions_xml()
1337
- return {
1338
- int(execution_type.attrib["id"]): execution_type.attrib["nome"]
1339
- for program_child in xml
1340
- if program_child.attrib["nome"] == program
1341
- for version_child in program_child[0][0][0]
1342
- if version_child.attrib["versao"] == version
1343
- for execution_type in version_child[0]
1344
- }
1345
-
1346
- def get_memory_per_process_ratios(self) -> List[str]:
1347
- xml = self._get_cloud_clusters_xml()
1348
- return sorted(
1349
- list(
1350
- {
1351
- f"{instance_type.attrib['memoriaPorCore']}:1"
1352
- for cluster in xml
1353
- if cluster.attrib["nome"] == self.cluster["name"]
1354
- for colection in cluster
1355
- if colection.tag == "ColecaoInstanciaTipo"
1356
- for instance_type in colection
1357
- }
1358
- )
1359
- )
1360
-
1361
- def get_repository_durations(self) -> dict[int, str]:
1362
- if self.cluster == "PSR-US":
1363
- return {
1364
- 2: "Normal (1 month)",
1365
- }
1366
-
1367
- else:
1368
- return {
1369
- 1: "Short (1 week)",
1370
- 2: "Normal (1 month)",
1371
- 3: "Extended (6 months)",
1372
- 4: "Long (2 years)",
1373
- }
1374
-
1375
- def _get_instance_type_map(self) -> dict[int, tuple[str, bool]]:
1376
- if self._instance_type_map is not None:
1377
- return self._instance_type_map
1378
- xml = self._get_cloud_clusters_xml()
1379
- self._instance_type_map = {
1380
- int(instance_type.attrib["id"]): (
1381
- f'{instance_type.attrib["memoriaPorCore"]}:1',
1382
- "Price Optimized" in instance_type.attrib["descricao"],
1383
- )
1384
- for cluster in xml
1385
- if cluster.attrib["nome"] == self.cluster["name"]
1386
- for collection in cluster
1387
- if collection.tag == "ColecaoInstanciaTipo"
1388
- for instance_type in collection
1389
- }
1390
- return self._instance_type_map
1391
-
1392
- @staticmethod
1393
- def _validate_extensions(extensions: List[str]):
1394
- for ext in extensions:
1395
- if not ext.isalnum():
1396
- raise CloudInputError(
1397
- f"Invalid extension '{ext}' detected. Extensions must be alphanumeric."
1398
- )
1399
-
1400
-
1401
- def _budget_matches_list(budget_part: str, all_budgets: List[str]) -> List[str]:
1402
- """Tests if a part of a budget name is in the list all_budgets and returns a list of matches."""
1403
- lowered_budget_part = budget_part.lower()
1404
- return [budget for budget in all_budgets if lowered_budget_part in budget.lower()]
1405
-
1406
-
1407
- def _filter_upload_files(directory: str, upload_filter: str) -> List[str]:
1408
- """
1409
- Filter files in a directory based on the upload filter.
1410
- :param directory: Directory to filter files from.
1411
- :param upload_filter: Regular expression filter for file names.
1412
- :return: List of filtered file paths.
1413
- """
1414
- if not os.path.exists(directory):
1415
- raise CloudInputError(f"Directory {directory} does not exist")
1416
-
1417
- regex = re.compile(upload_filter)
1418
- filtered_files = []
1419
- for file in os.listdir(directory):
1420
- if regex.match(file):
1421
- filtered_files.append(os.path.join(directory, file))
1422
- return filtered_files
1423
-
1424
-
1425
- def replace_case_str_values(client: Client, case: Case) -> Case:
1426
- """Create a new case object using internal integer IDs instead of string values."""
1427
- # Model Version
1428
- if isinstance(case.program_version, str):
1429
- program_versions = client.get_program_versions(case.program)
1430
- case.program_version = next(
1431
- key
1432
- for key, value in program_versions.items()
1433
- if value == case.program_version
1434
- )
1435
-
1436
- # Execution Type
1437
- if isinstance(case.execution_type, str):
1438
- execution_types = client.get_execution_types(case.program, case.program_version)
1439
- case.execution_type = next(
1440
- key
1441
- for key, value in execution_types.items()
1442
- if value == case.execution_type
1443
- )
1444
- return case