patme 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of patme might be problematic. Click here for more details.

Files changed (46) hide show
  1. patme/__init__.py +52 -0
  2. patme/buildtools/__init__.py +7 -0
  3. patme/buildtools/rce_releasecreator.py +336 -0
  4. patme/buildtools/release.py +26 -0
  5. patme/femtools/__init__.py +5 -0
  6. patme/femtools/abqmsgfilechecker.py +137 -0
  7. patme/femtools/fecall.py +1092 -0
  8. patme/geometry/__init__.py +0 -0
  9. patme/geometry/area.py +124 -0
  10. patme/geometry/coordinatesystem.py +635 -0
  11. patme/geometry/intersect.py +284 -0
  12. patme/geometry/line.py +183 -0
  13. patme/geometry/misc.py +420 -0
  14. patme/geometry/plane.py +464 -0
  15. patme/geometry/rotate.py +244 -0
  16. patme/geometry/scale.py +152 -0
  17. patme/geometry/shape2d.py +50 -0
  18. patme/geometry/transformations.py +1831 -0
  19. patme/geometry/translate.py +139 -0
  20. patme/mechanics/__init__.py +4 -0
  21. patme/mechanics/loads.py +435 -0
  22. patme/mechanics/material.py +1260 -0
  23. patme/service/__init__.py +7 -0
  24. patme/service/decorators.py +85 -0
  25. patme/service/duration.py +96 -0
  26. patme/service/exceptionhook.py +104 -0
  27. patme/service/exceptions.py +36 -0
  28. patme/service/io/__init__.py +3 -0
  29. patme/service/io/basewriter.py +122 -0
  30. patme/service/logger.py +375 -0
  31. patme/service/mathutils.py +108 -0
  32. patme/service/misc.py +71 -0
  33. patme/service/moveimports.py +217 -0
  34. patme/service/stringutils.py +419 -0
  35. patme/service/systemutils.py +290 -0
  36. patme/sshtools/__init__.py +3 -0
  37. patme/sshtools/cara.py +435 -0
  38. patme/sshtools/clustercaller.py +420 -0
  39. patme/sshtools/facluster.py +350 -0
  40. patme/sshtools/sshcall.py +168 -0
  41. patme-0.4.4.dist-info/LICENSE +21 -0
  42. patme-0.4.4.dist-info/LICENSES/MIT.txt +9 -0
  43. patme-0.4.4.dist-info/METADATA +168 -0
  44. patme-0.4.4.dist-info/RECORD +46 -0
  45. patme-0.4.4.dist-info/WHEEL +4 -0
  46. patme-0.4.4.dist-info/entry_points.txt +3 -0
patme/sshtools/cara.py ADDED
@@ -0,0 +1,435 @@
1
+ # SPDX-FileCopyrightText: 2022 German Aerospace Center (DLR)
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ """
6
+ Connect via python to the FA cluster using ssh, submit commands, move inputs/outputs to and from the cluster.
7
+
8
+ **Scenario**
9
+
10
+ To connect to the institute cluster and submit a job, two things need to be done.
11
+ First the input files must be copied to the cluster.
12
+
13
+ ``\\\\cluster.fa.bs.dlr.de\\<username>\\``
14
+
15
+ Secondly, when all required files are available on the cluster,
16
+ the cluster command (see cluster documentation) needs to be sent using a secure connection.
17
+ This is done via ssh using the rsa-public/privat-key algorithm.
18
+
19
+ **Connect for the first time**
20
+
21
+ - Create a public+private key using ssh-keygen
22
+ - Put the private key to the location specified in patme.sshtools.sshcall.privateKeyFileConfig
23
+ or adapt this variable at runtime.
24
+ - Append the public key to "~/.ssh/authorized_keys" on the remote computer
25
+
26
+ **HowTo connect to the a host**
27
+
28
+ >> sshCluster('echo hello world')
29
+ 'hello world\\n'
30
+
31
+ """
32
+ import os
33
+ import re
34
+ from time import sleep, time
35
+
36
+ from patme.service.exceptions import DelisSshError
37
+ from patme.service.logger import log
38
+ from patme.sshtools import sshcall
39
+
40
+
41
+ def get_default_slurm_args():
42
+ return {
43
+ "nodes": "1",
44
+ "hint": "nomultithread",
45
+ "time": "02:00:00",
46
+ "account": "2263032",
47
+ "no-kill": "",
48
+ "output": "cluster.r%j.log",
49
+ "partition": os.environ.get("CARA_PARTITION", "ppp"),
50
+ }
51
+
52
+
53
+ def _getClusterAuthentication():
54
+ """Returns the hostname, host key string and private key file information for a cluster call.
55
+
56
+ A description to these objects can be found in service.sshremotecall.callSSH"""
57
+
58
+ hostname = "cara.dlr.de"
59
+ hostKeyString = "AAAAB3NzaC1yc2EAAAADAQABAAACAQDL9y9u3D+refVuZnJJNdVeMK53EG0hfGUwuA+JyT2zOs6xOnhXhbTB0hOpORv4sd9V3mHJDf1yyIlZ/bgJCT4Znazz3amqzD7SmqGeNR8r7Z4whQY0drMpL67fthFNsqoUdjsOn+FZfWsZhy2ntMLIi4KRZ9Kaoe8Kqo3j1gej0iwq6W2+LYB69zhP1SHtT+603Qw97kAgrQeA2R71BFwUXSRzgDbPlucX8he9S4WjWZ3OTpfXksQtIN/8jGAsTw6x/4iu1ia8bjW5jc4q5qrF4UPdsRlbuByn2/QBU4XHZUcq6rZqv6KGyNqja2sZHsT7weDHo5JtYMNUzVB75SfmMigIxy3hcD6xicc5gSLQuw7e1BZsC8ld9Ku5hkL9OdXl/jkble55dO9lEKgze+y0QscBAYJKgi0FpQSMxw9SNt1IdImosIWfTT3jY3halybgWKvx85LVM86q45bk0RSSjgh1Oup87UO3GqF72zA+PX36v32WqMKoQ6ssqKjXOwSsXC1Ytf4GU7utoUXsqqFZOM/6CZp/09yPTTkkZGGsy2iUOw/1bS3uQcZi+lIpWqtEbsHYjrEOIPxofz4gl2Fo8yfQoUhKmED4XwWMnw0jwxNHy2uwBQz0ysIT4tz1ekBUh4fgO+2xhX/g6O24sLsfAGzc/I1gIUpmMaGJOQiwuw=="
60
+
61
+ """this is the key from the file "~/.ssh/known_hosts" used by openssh on a linux machine"""
62
+ privateKey = os.path.join(os.path.expanduser("~"), ".ssh", "id_rsa")
63
+
64
+ try:
65
+ from sshconf import read_ssh_config
66
+
67
+ cfgfile = os.path.join(os.path.expanduser("~"), ".ssh", "config")
68
+ if os.path.exists(cfgfile):
69
+ conf = read_ssh_config(cfgfile)
70
+ for host in conf.hosts():
71
+ cara_host = conf.host(host)
72
+ if "cara.dlr.de" in cara_host["hostname"]:
73
+ privateKey = cara_host["identityfile"]
74
+ break
75
+
76
+ except ImportError:
77
+ pass
78
+
79
+ if not os.path.exists(privateKey):
80
+ msg = f"Private key file '{privateKey}' not found! "
81
+ msg += "Ensure that the ssh-agent has a valid private key stored"
82
+ log.warn(msg)
83
+ privateKey = None
84
+
85
+ return hostname, hostKeyString, privateKey
86
+
87
+
88
+ def sshClusterJob(
89
+ remoteCommand, printOutput=True, checkInterval=5, time2WaitForJob=30, monitorNodeUsage=False, **kwargs
90
+ ):
91
+ """Submit a job to the institute cluster via ssh and return when terminated.
92
+
93
+ After job submission, a connection to the cluster is established every 'checkInterval' seconds
94
+ to check if the job's status is already set to 'COMPLETED'.
95
+
96
+ :param remoteCommand: String with command for cluster. The arguments for the queuing
97
+ system must not contain the option '-i' to wait for job
98
+ completion.
99
+ :param printOutput: True (default) will print output created by the ssh call.
100
+ :param checkInterval: Time period in seconds between job completion checks
101
+ :param time2WaitForJob: After job submission it might take some time for the cluster to
102
+ add the job to the queue. Enter max seconds [int] to wait.
103
+
104
+ :return: int, job id
105
+ """
106
+ with log.switchLevelTemp(log.WARN):
107
+ retVal = sshCluster(remoteCommand, printOutput=printOutput, **kwargs)
108
+
109
+ if not retVal or (retVal and "Submitted batch job" not in retVal):
110
+ msg = "Job submission to cluster failed or maybe the arguments "
111
+ msg += "for the cluster contained the option -i\n"
112
+ msg += f"remote command: {remoteCommand}\nreturn value: {retVal}"
113
+ raise DelisSshError(msg)
114
+
115
+ try:
116
+ jobId = int(retVal.split()[-1])
117
+ except:
118
+ msg = "Could not extract job id for cluster job submission.\n"
119
+ msg += f"remote command: {remoteCommand}\nreturn value: {retVal}"
120
+ raise DelisSshError(msg)
121
+
122
+ log.info(f"Job enqueued. JobId: {jobId}")
123
+ usageWarningDone = False
124
+ jobStatus = "ENQUEUED"
125
+ while True:
126
+ with log.switchLevelTemp(log.WARN):
127
+ (jobEnded, retStatus) = clusterJobEnded(jobId, time2WaitForJob, **kwargs)
128
+
129
+ if jobEnded:
130
+ break
131
+
132
+ if retStatus != jobStatus:
133
+ log.info(f"Job status: {retStatus}")
134
+ jobStatus = retStatus
135
+
136
+ if monitorNodeUsage:
137
+ if not usageWarningDone:
138
+ nodeName = getNodeOfJob(jobId)
139
+ usageWarningDone = printNodeUtilization(nodeName, usageWarningDone)
140
+
141
+ sleep(checkInterval)
142
+
143
+ return jobId
144
+
145
+
146
+ def sshCluster(remoteCommand, printOutput=True, **kwargs):
147
+ """Submit a job to the institute cluster via ssh.
148
+
149
+ The method does not
150
+ wait for the completion of the cluster call. Please use sshClusterJob instead.
151
+
152
+ :param remoteCommand: String with command for cluster
153
+ :param printOutput: True (default) will print output created by the ssh call.
154
+ """
155
+ username = kwargs.get("username", None)
156
+ hostname, bsfalxclusterKeyString, privateKeyFile = _getClusterAuthentication()
157
+ return sshcall.callSSH(
158
+ hostname,
159
+ remoteCommand,
160
+ privateKeyFile,
161
+ username=username,
162
+ hostKeyString=bsfalxclusterKeyString,
163
+ printOutput=printOutput,
164
+ )
165
+
166
+
167
+ def clusterJobEnded(jobId, time2WaitForJob=30, printOutput=False, **kwargs):
168
+ """Checks if jobId is still listed in the cluster queue.
169
+
170
+ :param jobId: Id of job running on the cluster <int>
171
+ :param time2WaitForJob: After job submission it might take some time for the cluster to
172
+ add the job to the queue. Enter max seconds [int] to wait.
173
+ :param printOutput: Flag if the ssh output should be output. Defaults to True
174
+ :return: True if job with jobId still exists in the queue, else False
175
+ :raise DelisSshError: in case job is neither running nor completed successfully
176
+ """
177
+ status = clusterJobStatus(jobId, printOutput=printOutput, **kwargs)
178
+ if not status and time2WaitForJob:
179
+ startTime = time()
180
+ while not status:
181
+ status = clusterJobStatus(jobId, printOutput=printOutput, **kwargs)
182
+ if time() - startTime > time2WaitForJob:
183
+ raise DelisSshError(f"Could not obtain status of cluster job with id {jobId}")
184
+
185
+ if not status:
186
+ raise DelisSshError(f"Job with id {jobId} not found in cluster job history.")
187
+
188
+ elif "PENDING" == status:
189
+ log.debug("Job execution on cluster is waiting for resources.")
190
+ return (False, status)
191
+
192
+ elif status in ["RESIZING", "RUNNING", "REQUEUED"]:
193
+ return (False, status)
194
+
195
+ elif "FAILED" == status:
196
+ log.debug(f"Job with id {jobId} failed")
197
+ return (True, status)
198
+
199
+ elif "CANCELLED" == status:
200
+ log.debug(f"Job with id {jobId} was cancelled.")
201
+ return (True, status)
202
+
203
+ elif "COMPLETED" == status:
204
+ return (True, status)
205
+
206
+ elif "OUT_OF_ME" in status:
207
+ log.debug(f"Job with id {jobId} failed due to too less memory")
208
+ return (True, status)
209
+
210
+ else:
211
+ raise DelisSshError(f'Unknown cluster status: "{status}"')
212
+
213
+
214
+ def clusterJobStatus(jobId, printOutput=False, **kwargs):
215
+ """Checks if cluster process with id "jobID" is pending.
216
+
217
+ :param jobId: id of cluster process (int)
218
+ :return: True if state is pending, else False
219
+ """
220
+ squeue = sshCluster("squeue", printOutput=printOutput, **kwargs)
221
+ jobFinished = re.search(rf"\s+{jobId} ", squeue) is None
222
+ sacct = sshCluster(f"sacct -o state -n -j {jobId}", printOutput=printOutput, **kwargs)
223
+ status = sacct.split("\n")[0].replace("+", "").strip()
224
+ if jobFinished and (status in ["RUNNING", "PENDING"]):
225
+ return "COMPLETED"
226
+ else:
227
+ return status
228
+
229
+
230
+ def copyClusterFilesSCP(files, srcBaseDir=".", destBaseDir=".", mode="put", keytype="ssh-rsa", port=None, **kwargs):
231
+ """doc"""
232
+ hostname, bsfalxclusterKeyString, privateKeyFile = _getClusterAuthentication()
233
+ username = kwargs.pop("username", None)
234
+ with log.switchLevelTemp(log.WARN):
235
+ sshcall.copyFilesSCP(
236
+ files,
237
+ hostname,
238
+ privateKeyFile,
239
+ username,
240
+ srcBaseDir,
241
+ destBaseDir,
242
+ bsfalxclusterKeyString,
243
+ mode,
244
+ keytype,
245
+ port,
246
+ )
247
+
248
+
249
+ def _wrapSshCluster(*args, **kwargs):
250
+ """This method wraps the sshCluster routine to prevent python cyclic imports"""
251
+ retries = 3
252
+ for retry in range(retries):
253
+ try:
254
+ result = sshCluster(*args, **kwargs)
255
+ break
256
+ except Exception as e:
257
+ if retry < retries:
258
+ log.error(f"Got an error while calling the cluster (retry in 60s): {e}")
259
+ time.sleep(60)
260
+ else:
261
+ raise
262
+ return result
263
+
264
+
265
+ def numberOfClusterJobsAvailable(exclusiveNode=False):
266
+ """Checks and returns the number of available jobs on the FA cluster.
267
+
268
+ :param exclusiveNode: if True, number of cluster jobs is given, that can allocate
269
+ a complete node. The default is False
270
+
271
+ :returns: Returns the number of jobs that can be executed on the cluster.
272
+ """
273
+ clusterCommand = 'sinfo -h -o "%t %N";'
274
+ clusterCommand += 'squeue -h -t RUNNING,COMPLETING -o "%N"'
275
+ clusterOutput = _wrapSshCluster(clusterCommand, printOutput=False).split("\n")
276
+
277
+ # STATE NODELIST <- this line does not appear in clusterOutput
278
+ # mix node[1,3] <- these nodes have one or more active jobs
279
+ # alloc node5 <- these nodes are exclusively used
280
+ # idle node[2,4,6] <- these nodes are awaiting jobs (up to 2)
281
+ # NODELIST <- this line does not appear in clusterOutput
282
+ # node5 <- job on node5
283
+ # node1
284
+ # node3
285
+ # node3
286
+
287
+ mixNodes = _splitNodes([line.split()[1][4:].strip("[]") for line in clusterOutput if "mix" in line])
288
+ idleNodes = _splitNodes([line.split()[1][4:].strip("[]") for line in clusterOutput if "idle" in line])
289
+ nodeNumbersOfActiveJobs = [int(line[4:].strip()) for line in clusterOutput if line.startswith("node")]
290
+
291
+ numberOfPosssibleJobs = 0
292
+ if exclusiveNode:
293
+ numberOfPosssibleJobs = len(idleNodes)
294
+ else:
295
+ for mixNode in mixNodes:
296
+ if nodeNumbersOfActiveJobs.count(mixNode) < 2:
297
+ numberOfPosssibleJobs += 1
298
+ numberOfPosssibleJobs += len(idleNodes) * 2
299
+ return numberOfPosssibleJobs
300
+
301
+
302
+ def _splitNodes(nodes):
303
+ """parses the nodes string and returns a list of node numbers
304
+
305
+ Example:
306
+
307
+ >>> inputString = ['1,4', '2-3,5-6']
308
+ >>> _splitNodes(inputString)
309
+ [1, 2, 3, 4, 5, 6]
310
+ """
311
+ outputNodes = []
312
+ for nodesString in nodes:
313
+ groups = nodesString.split(",")
314
+ for group in groups:
315
+ groupMembers = group.split("-")
316
+ if len(groupMembers) > 1:
317
+ outputNodes.extend(range(int(groupMembers[0]), int(groupMembers[1]) + 1))
318
+ else:
319
+ outputNodes.append(int(groupMembers[0]))
320
+ return list(set(outputNodes))
321
+
322
+
323
+ def numberOfIdleClusterNodes():
324
+ """returns the number of idle cluster nodes
325
+
326
+ cluster call returns: "3/3" which is Allocated/Idle
327
+
328
+ Attention: This is not the number of possible cluster jobs, since 2 jobs can be run
329
+ at each node. If zero nodes are idle, there may be still the opportunity to start
330
+ a job right away.
331
+ """
332
+ clusterOutput = _wrapSshCluster('sinfo -h -e -o "%A"', printOutput=False)
333
+ return int(clusterOutput.split("/")[-1])
334
+
335
+
336
+ def getNodeUtilization(nodeName="head"):
337
+ """Returns the utilization of the cluster head (default) or of one of its nodes.
338
+ The information is retrieved using the commands vmstat and df. The keys of the
339
+ returned dictionary are described in the following.
340
+
341
+ Processes
342
+ r: The number of processes waiting for run time.
343
+ b: The number of processes in uninterruptible sleep.
344
+ RAM Memory
345
+ swpd: The amount of virtual memory used. (in MB)
346
+ free: The amount of idle memory. (in MB)
347
+ buff: The amount of memory used as buffers. (in MB)
348
+ cache: The amount of memory used as cache. (in MB)
349
+ Swap Memory
350
+ si: Amount of memory swapped in from disk (in MB/s).
351
+ so: Amount of memory swapped to disk (in MB/s).
352
+ IO
353
+ bi: Blocks received from a block device (blocks/s).
354
+ bo: Blocks sent to a block device (blocks/s).
355
+ System
356
+ in: The number of interrupts per second, including the clock.
357
+ cs: The number of context switches per second.
358
+ CPU
359
+ These are percentages of total CPU time.
360
+ us: Time spent running non-kernel code. (user time, including nice time)
361
+ sy: Time spent running kernel code. (system time)
362
+ id: Time spent idle. Prior to Linux 2.5.41, this includes IO-wait time.
363
+ wa: Time spent waiting for IO. Prior to Linux 2.5.41, shown as zero.
364
+ HDD Memory
365
+ 1K-blocks: Total size of storage memory (in KB)
366
+ Used: Total size of used storage memory (in KB)
367
+ Available: Total size of available storage memory (in KB)
368
+ Use%: Relative usage of storage memory (in %)
369
+
370
+ :param nodeName: Name of the node (node1, ...) of which the information is to
371
+ be retrieved. The default is "head".
372
+ :return: Dictionary with utilization information.
373
+ """
374
+ nodeCmdString = ""
375
+ filesystem = "/home"
376
+ if nodeName != "head":
377
+ nodeCmdString = f"ssh {nodeName} "
378
+ filesystem = "/dev/sda3"
379
+ remoteCmd = f"{nodeCmdString}vmstat -S M;"
380
+ remoteCmd += f"{nodeCmdString}df -l -k"
381
+ remoteCmdOutput = _wrapSshCluster(remoteCmd, printOutput=False).split("\n")
382
+ vmstatDict = dict(zip(remoteCmdOutput[1].split(), [float(item) for item in remoteCmdOutput[2].split()]))
383
+ dfData = [row for row in remoteCmdOutput if row.startswith(filesystem)][0]
384
+ dfDict = dict(zip(remoteCmdOutput[3].split()[1:-1], [float(item.strip("%")) for item in dfData.split()[1:-1]]))
385
+ vmstatDict.update(dfDict)
386
+ return vmstatDict
387
+
388
+
389
+ def printNodeUtilization(self, nodeName, printOnCriticalUtilization=False):
390
+ """Prints the utilization of a cluster node
391
+
392
+ :param nodeName: name of the cluster node to inspect
393
+ :param printOnCriticalUtilization: Flag if only on a critical utilization, the routine should print anything
394
+ :return: Flag if a usage warning was emit
395
+ :raise DelisSshError: if nodeName could not be found
396
+ """
397
+ if printOnCriticalUtilization:
398
+ logMethod = log.warn
399
+ else:
400
+ logMethod = log.info
401
+ usageWarningDone = False
402
+ if nodeName:
403
+ utilizationInfo = getNodeUtilization(nodeName=nodeName)
404
+ freeRam = (utilizationInfo["free"] + utilizationInfo["cache"]) / 1024
405
+ freeHdd = utilizationInfo["Available"] / 1024 / 1024
406
+ if freeHdd < 2 or not printOnCriticalUtilization:
407
+ logMethod(f"HDD memory utilization of node {nodeName} critical. This may cause problems.")
408
+ usageWarningDone = True
409
+ if freeRam < 2 or not printOnCriticalUtilization:
410
+ logMethod(f"RAM memory utilization of node {nodeName} critical. This may cause problems.")
411
+ usageWarningDone = True
412
+ else:
413
+ raise DelisSshError(f'Utilization of the used cluster node {nodeName} cannot be performed: Node "" not found.')
414
+ return usageWarningDone
415
+
416
+
417
+ def getNodeOfJob(jobId):
418
+ """Returns the name of the node on which the job with id "jobId" is being executed on.
419
+
420
+ :param jobId: Id of cluster process (int)
421
+ :return: Name of node ("node1", "node2", ...) or None if jobId is not found.
422
+ """
423
+ node = None
424
+ try:
425
+ node = _wrapSshCluster(f"squeue | grep {jobId}", printOutput=False).split()[7]
426
+ except:
427
+ log.warning("Node not found, because jobID not found in cluster queue")
428
+ return node
429
+
430
+
431
+ if __name__ == "__main__":
432
+ from patme.sshtools import sshcall
433
+
434
+ sshcall.privateKeyFileConfig = None
435
+ sshCluster("echo foobar")