eos-utils 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
eos-utils/__init__.py ADDED
@@ -0,0 +1,2 @@
1
+ from get_voms import get_voms
2
+ from copy_eos import copy_eos
eos-utils/copy_eos.py ADDED
@@ -0,0 +1,165 @@
1
+ import os
2
+ import subprocess
3
+
4
+ from .get_voms import get_voms
5
+
6
+ ################################
7
+
8
+
9
+ def find_nth(string: str, sub: str, n: int):
10
+ index = 0
11
+ for i in range(n):
12
+ if string.find(sub) < 0:
13
+ print(f"WARNING: Requested {n} appearance of {sub}, but search broke on {i+1} appearance. Returning -1")
14
+ return -1
15
+ index += (string.find(sub) + len(sub))
16
+ string = string[index+len(sub):]
17
+ return index
18
+
19
+ def copy_eos(
20
+ origin_filepath: str, destination_filepath: str,
21
+ grep_str: str="...", filetype_str: str="...", force: bool=False,
22
+ condor: bool=False, output_dir: str=os.path.join(os.getcwd(), ".condor_copy_eos", ""), queue: str="longlunch", memory: str="4GB"
23
+ ):
24
+ """
25
+ A function that facilitates the transferring of (potentially large and many) files from one location to another, with at least one location being part of the CERN EOS filesystem.
26
+
27
+ Required arguments:
28
+ - origin_filepath: <str> Full filepath (including redirector if applicable) for location of directory to transfer from.
29
+ - destination_filepath: <str> Full filepath (including redirector if applicable) for location of directory to transfer to.
30
+ Optional arguments:
31
+ - grep_str: <str> String with which to select (using grep) which files to transfer. Only files whose filepath contains this string at the origin EOS space will be transfered.
32
+ - filetype_str: <str> Filetype of files to transfer, format as \".<filetype>\". Only files whose filetype matches this string will be transfered.
33
+ - force: <bool> Enables forcing the xrdcp (overwriting files if already located at the destination).
34
+ Condor arguments:
35
+ - condor: <bool> Enables transfer with condor.
36
+ - output_dir: <str> Directory for condor files to be dumped into.
37
+ - queue: <str> Queue with which to submit the condor job.
38
+ - memory: <str> RAM with which to submit the condor job.
39
+ """
40
+
41
+ if find_nth(origin_filepath, "//", 2) >= 0:
42
+ origin_redirector = origin_filepath[:find_nth(origin_filepath, "//", 2)+1]
43
+ origin_filepath = os.path.join(origin_filepath[find_nth(origin_filepath, "//", 2)+1:], "")
44
+ else:
45
+ origin_redirector = ""; origin_filepath = origin_filepath
46
+ if find_nth(destination_filepath, "//", 2) >= 0:
47
+ destination_redirector = destination_filepath[:find_nth(destination_filepath, "//", 2)+1]
48
+ destination_filepath = os.path.join(destination_filepath[find_nth(destination_filepath, "//", 2)+1:], "")
49
+ else:
50
+ destination_redirector = ""; destination_filepath = destination_filepath
51
+
52
+ if origin_redirector == "" and destination_redirector == "":
53
+ print("ERROR: Both the source and target paths don't have redirectors, wither this is entirely a local copy and should use `cp` or `eoscp`, or you forgot to input the reirectors. Exiting now.")
54
+ return 1
55
+
56
+ jobs_dir = os.path.join(output_dir, subprocess.getoutput("date +%Y%m%d_%H%M%S"), "")
57
+ if not os.path.exists(jobs_dir): os.makedirs(jobs_dir)
58
+
59
+ # Making a temporary file containing a list of all the files that need to be transferred from one EOS space to another
60
+ if grep_str != "...":
61
+ if origin_redirector != "":
62
+ os.system(f"xrdfs {origin_redirector} ls -R {origin_filepath} | grep {grep_str} > temp.txt")
63
+ else:
64
+ os.system(f"ls -R {origin_filepath} | grep {grep_str} > temp.txt")
65
+ else:
66
+ if origin_redirector != "":
67
+ os.system(f"xrdfs {origin_redirector} ls -R {origin_filepath} > temp.txt")
68
+ else:
69
+ os.system(f"ls -R {origin_filepath} > temp.txt")
70
+
71
+ # Skimming output and keeping only real files (that have the right filetype, if given)
72
+ files_to_copy = []
73
+ with open("temp.txt", "r") as f:
74
+ for line in f:
75
+ formatted_line = line.rstrip()
76
+ end_of_filepath = formatted_line.split("/")[-1]
77
+ if (
78
+ filetype_str != "..."
79
+ and end_of_filepath.endswith(filetype_str)
80
+ ) or (
81
+ filetype_str == "..."
82
+ and end_of_filepath.find(".") != -1
83
+ ):
84
+ files_to_copy.append(formatted_line[formatted_line.find(origin_filepath)+len(origin_filepath):])
85
+
86
+ # Remove already transferred files if not forcing
87
+ if not force:
88
+ skimmed_files_to_copy = []
89
+ for file_to_copy in files_to_copy:
90
+ if destination_redirector != "":
91
+ stat, out = subprocess.getstatusoutput(f"xrdfs {destination_redirector} ls {destination_filepath}{file_to_copy}")
92
+ else:
93
+ stat, out = subprocess.getstatusoutput(f"ls {destination_filepath}{file_to_copy}")
94
+ if stat != 0: skimmed_files_to_copy.append(file_to_copy)
95
+ files_to_copy = skimmed_files_to_copy
96
+ if len(files_to_copy) < 1:
97
+ return 1
98
+
99
+ # Deleting the temp file
100
+ os.system(f"rm temp.txt")
101
+
102
+ # Get proxy information (required in executable script for this method of running)
103
+ proxy = get_voms()
104
+
105
+ if not condor:
106
+ for file_to_copy in files_to_copy:
107
+ cmd = ["xrdcp", "-f", origin_redirector+origin_filepath+file_to_copy, destination_redirector+destination_filepath+file_to_copy]
108
+ if not force:
109
+ cmd.remove("-f")
110
+ subprocess.call(cmd)
111
+ else:
112
+ # Setup the filepaths for the input and output files
113
+ base_name = "copy_eos"
114
+ job_file_executable = os.path.join(jobs_dir, f"{base_name}.sh")
115
+ job_file_submit = os.path.join(jobs_dir, f"{base_name}.sub")
116
+ job_file_out = os.path.join(jobs_dir, f"{base_name}.$(ClusterId).$(ProcId).out")
117
+ job_file_err = os.path.join(jobs_dir, f"{base_name}.$(ClusterId).$(ProcId).err")
118
+ job_file_log = os.path.join(jobs_dir, f"{base_name}.$(ClusterId).log")
119
+ n_jobs = len(files_to_copy)
120
+
121
+ # Write the executable file
122
+ with open(job_file_executable, "w") as executable_file:
123
+ # Shabang and x509 proxy
124
+ executable_file.write("#!/bin/bash\n")
125
+ executable_file.write(f"export X509_USER_PROXY={'/srv'+proxy[proxy.rfind('/'):]}\n")
126
+
127
+ # Transfer files
128
+ executable_file.write("echo \"Start of job $1\"\n")
129
+ executable_file.write("echo \"-------------------------------------\"\n")
130
+
131
+ for i, file_to_copy in enumerate(files_to_copy):
132
+ executable_file.write(f"if [ $1 -eq {i} ]; then\n")
133
+ # executable_file.write(f" echo \"Transfering {file_to_copy}\"\n")
134
+ executable_file.write(f" xrdcp{' -f' if force else ''} {origin_redirector}{origin_filepath}{file_to_copy} {destination_redirector}{destination_filepath}{file_to_copy}\n")
135
+ executable_file.write("fi\n")
136
+
137
+ executable_file.write("echo \"Finished job $1\"\n")
138
+ executable_file.write("echo \"-------------------------------------\"\n")
139
+ os.system(f"chmod 775 {job_file_executable}")
140
+
141
+ # Write the submit file
142
+ with open(job_file_submit, "w") as submit_file:
143
+ submit_file.write(f"executable = {job_file_executable}\n")
144
+ submit_file.write("arguments = $(ProcId)\n")
145
+ submit_file.write(f"output = {job_file_out}\n")
146
+ submit_file.write(f"error = {job_file_err}\n")
147
+ submit_file.write(f"log = {job_file_log}\n")
148
+ submit_file.write(f"request_memory = {memory}\n")
149
+ submit_file.write("getenv = True\n")
150
+ submit_file.write(f'+JobFlavour = "{queue}"\n')
151
+ submit_file.write(f"should_transfer_files = YES\n")
152
+ submit_file.write(f"Transfer_Input_Files = {proxy}\n")
153
+ submit_file.write(f"Transfer_Output_Files = \"\"\n")
154
+ submit_file.write(f'when_to_transfer_output = ON_EXIT\n')
155
+
156
+ submit_file.write('on_exit_remove = (ExitBySignal == False) && (ExitCode == 0)\n')
157
+ submit_file.write('max_retries = 0\n')
158
+ submit_file.write(f"queue {n_jobs}\n")
159
+
160
+ # Submit the condor jobs
161
+ if os.getcwd().startswith("/eos"):
162
+ # see https://batchdocs.web.cern.ch/troubleshooting/eos.html#no-eos-submission-allowed
163
+ subprocess.run(["condor_submit", "-spool", job_file_submit])
164
+ else:
165
+ subprocess.run("condor_submit {}".format(job_file_submit), shell=True)
eos-utils/get_voms.py ADDED
@@ -0,0 +1,30 @@
1
+ import subprocess
2
+
3
+ ################################
4
+
5
+
6
+ def get_voms(
7
+ reqd_hrs: int=5
8
+ ):
9
+ """
10
+ A simple function that checks if the users VOMS proxy is valid for a minimal amount of time
11
+
12
+ Optional arguments:
13
+ - reqd_hrs: <int> Number of hours proxy is required to be valid for
14
+ """
15
+
16
+ try:
17
+ stat, out = subprocess.getstatusoutput(f"voms-proxy-info -e --valid {reqd_hrs}:00")
18
+ except:
19
+ print(f"ERROR: voms proxy not found or validity less than {reqd_hrs} hours:\n%s", out)
20
+ raise
21
+
22
+ try:
23
+ stat, out = subprocess.getstatusoutput("voms-proxy-info -p")
24
+ out = out.strip().split("\n")[-1]
25
+ except:
26
+ print("ERROR: Unable to voms proxy:\n%s", out)
27
+ raise
28
+
29
+ proxy = out
30
+ return proxy
@@ -0,0 +1,19 @@
1
+ Metadata-Version: 2.4
2
+ Name: eos-utils
3
+ Version: 0.0.1
4
+ Summary: A small package to hold useful EOS tools.
5
+ Author-email: Thomas Sievert <63161166+tcoulvert@users.noreply.github.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/tcoulvert/eos-utils
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Operating System :: OS Independent
10
+ Requires-Python: >=3.6
11
+ Description-Content-Type: text/markdown
12
+
13
+ # EOS-Utils
14
+
15
+ A small project to hold some useful python scripts for working with EOS and the CERN distributed computing cluster systems. A short description of the various files is given below
16
+
17
+ File copying:
18
+ * `copy_eos.py` - Copies a file (can be a directory or a file) files from a source path (can be local or remote, can include an EOS redirector) to a target path (can be local or remote, can include a redirector). There are optional features such as filepath grepping and filetype restriction. See the arguments of the script for details.
19
+ * `get_voms.py` - Checks if the user's VOMS proxy is valid for a minimal number of hours, minimal time can be passed as an argument.
@@ -0,0 +1,7 @@
1
+ eos-utils/__init__.py,sha256=5g3RpxFrbqc_iXM8a7XgY8qSHDgUaDNsoDh7I3DEGoI,59
2
+ eos-utils/copy_eos.py,sha256=3IaiquiqRMuLxidRBHfRDM8rdweVNSquGygfkiLA-Eg,8367
3
+ eos-utils/get_voms.py,sha256=7v20IZ3cPQc0oyByyOHMnTN6fbAHhM8P02zVxx6WFF0,764
4
+ eos_utils-0.0.1.dist-info/METADATA,sha256=Gu310FwBghRLB51ZoaYHHqN1qC-s8JS8GcaXqlfM704,1106
5
+ eos_utils-0.0.1.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
6
+ eos_utils-0.0.1.dist-info/top_level.txt,sha256=rluZpU9dhNEOEocEB09Go49QG562bZZyA_ou-HvbtX0,10
7
+ eos_utils-0.0.1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ eos-utils