opex-manifest-generator 1.2.2__py3-none-any.whl → 1.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opex_manifest_generator/cli.py +19 -7
- opex_manifest_generator/common.py +20 -5
- opex_manifest_generator/hash.py +41 -10
- opex_manifest_generator/opex_manifest.py +116 -66
- opex_manifest_generator-1.2.3.dist-info/METADATA +527 -0
- opex_manifest_generator-1.2.3.dist-info/RECORD +16 -0
- {opex_manifest_generator-1.2.2.dist-info → opex_manifest_generator-1.2.3.dist-info}/WHEEL +1 -1
- opex_manifest_generator/samples/Opex.xml +0 -11
- opex_manifest_generator/samples/opex_manifest_generator_AutoClass.xlsx +0 -0
- opex_manifest_generator/samples/spreads/dctemplate.xlsx +0 -0
- opex_manifest_generator/samples/spreads/eadtemplate.xlsx +0 -0
- opex_manifest_generator/samples/spreads/gdprtemplate.xlsx +0 -0
- opex_manifest_generator/samples/spreads/modstemplate.xlsx +0 -0
- opex_manifest_generator-1.2.2.dist-info/METADATA +0 -400
- opex_manifest_generator-1.2.2.dist-info/RECORD +0 -22
- {opex_manifest_generator-1.2.2.dist-info → opex_manifest_generator-1.2.3.dist-info}/entry_points.txt +0 -0
- {opex_manifest_generator-1.2.2.dist-info → opex_manifest_generator-1.2.3.dist-info/licenses}/LICENSE.md +0 -0
- {opex_manifest_generator-1.2.2.dist-info → opex_manifest_generator-1.2.3.dist-info}/top_level.txt +0 -0
opex_manifest_generator/cli.py
CHANGED
|
@@ -30,9 +30,11 @@ def parse_args():
|
|
|
30
30
|
help= """Assign a prefix when utilising the --autoclass option. Prefix will append any text before all generated text.
|
|
31
31
|
When utilising the {both} option fill in like: [catalog-prefix, accession-prefix] without square brackets.
|
|
32
32
|
""")
|
|
33
|
-
parser.add_argument("-fx", "--fixity", required = False,
|
|
34
|
-
|
|
35
|
-
help="Generates a hash for each file and adds it to the opex, can select
|
|
33
|
+
parser.add_argument("-fx", "--fixity", required = False, nargs = '*', default = None,
|
|
34
|
+
choices = ['NONE', 'SHA-1', 'MD5', 'SHA-256', 'SHA-512'], type = str.upper, action=EmptyIsTrueFixity,
|
|
35
|
+
help="Generates a hash for each file and adds it to the opex, can select one or more algorithms to utilise. -fx SHA-1 MD5")
|
|
36
|
+
parser.add_argument("--pax-fixity", required = False, action = 'store_true', default = False,
|
|
37
|
+
help="Enables use of PAX fixity generation, in line with Preservica's Recommendation. Files / folders ending in .pax or .pax.zip will have individual files in folder / zip added to Opex.")
|
|
36
38
|
parser.add_argument("-rme", "--remove-empty", required = False, action = 'store_true', default = False,
|
|
37
39
|
help = "Remove and log empty directories from root. Log will be exported to 'meta' / output folder.")
|
|
38
40
|
parser.add_argument("-o", "--output", required = False, nargs = 1,
|
|
@@ -92,11 +94,14 @@ def run_cli():
|
|
|
92
94
|
if args.input and args.autoclass:
|
|
93
95
|
print(f'Both Input and Auto-Class options have been selected, please use only one...')
|
|
94
96
|
time.sleep(5); raise SystemExit()
|
|
97
|
+
if args.remove and not args.input:
|
|
98
|
+
print('Removal flag has been given without input, please ensure an input file is utilised when using this option.')
|
|
99
|
+
time.sleep(5); raise SystemExit()
|
|
95
100
|
if not args.metadata in {'none', 'n'} and not args.input:
|
|
96
101
|
print(f'Warning: Metadata Flag has been given without Input. Metadata won\'t be generated.')
|
|
97
102
|
time.sleep(5)
|
|
98
103
|
if args.print_xmls:
|
|
99
|
-
OpexManifestGenerator.print_descriptive_xmls()
|
|
104
|
+
OpexManifestGenerator(root = args.root).print_descriptive_xmls()
|
|
100
105
|
acc_prefix = None
|
|
101
106
|
if args.autoclass in {"accession", "a", "accession-generic", "ag", "both", "b", "both-generic", "bg"} and args.accession_mode is None:
|
|
102
107
|
args.accession_mode = "file"
|
|
@@ -159,9 +164,10 @@ def run_cli():
|
|
|
159
164
|
accession_mode=args.accession_mode,
|
|
160
165
|
acc_prefix = acc_prefix,
|
|
161
166
|
empty_flag = args.remove_empty,
|
|
162
|
-
|
|
167
|
+
removal_flag = args.remove,
|
|
163
168
|
clear_opex_flag = args.clear_opex,
|
|
164
|
-
algorithm = args.fixity,
|
|
169
|
+
algorithm = args.fixity,
|
|
170
|
+
pax_fixity= args.pax_fixity,
|
|
165
171
|
startref = args.start_ref,
|
|
166
172
|
export_flag = args.export,
|
|
167
173
|
meta_dir_flag = args.disable_meta_dir,
|
|
@@ -178,6 +184,12 @@ def run_cli():
|
|
|
178
184
|
sort_key = sort_key,
|
|
179
185
|
delimiter = args.delimiter,
|
|
180
186
|
keywords_abbreviation_number = args.keywords_abbreviation_number).main()
|
|
181
|
-
|
|
187
|
+
|
|
188
|
+
class EmptyIsTrueFixity(argparse.Action):
|
|
189
|
+
def __call__(self, parser, namespace, values, option_string=None):
|
|
190
|
+
if len(values) == 0:
|
|
191
|
+
values = ["SHA-1"]
|
|
192
|
+
setattr(namespace, self.dest, values)
|
|
193
|
+
|
|
182
194
|
if __name__ == "__main__":
|
|
183
195
|
run_cli()
|
|
@@ -5,8 +5,7 @@ author: Christopher Prince
|
|
|
5
5
|
license: Apache License 2.0"
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
import zipfile, os, sys, time, stat
|
|
9
|
-
import datetime
|
|
8
|
+
import zipfile, os, sys, time, stat, datetime, shutil
|
|
10
9
|
from lxml import etree
|
|
11
10
|
|
|
12
11
|
def zip_opex(file_path,opex_path):
|
|
@@ -17,10 +16,26 @@ def zip_opex(file_path,opex_path):
|
|
|
17
16
|
z.write(opex_path,os.path.basename(opex_path))
|
|
18
17
|
else: print(f'A zip file already exists for: {zip_file}')
|
|
19
18
|
|
|
20
|
-
def
|
|
19
|
+
def remove_tree(path: str, removed_list: list):
|
|
20
|
+
removed_list.append(path)
|
|
21
|
+
print(f"Removing: {path}")
|
|
22
|
+
if os.path.isdir(path):
|
|
23
|
+
for dp,d,f in os.walk(path):
|
|
24
|
+
for fn in f:
|
|
25
|
+
removed_list.append(win_256_check(dp+win_path_delimiter()+fn))
|
|
26
|
+
for dn in d:
|
|
27
|
+
removed_list.append(win_256_check(dp+win_path_delimiter()+dn))
|
|
28
|
+
shutil.rmtree(path)
|
|
29
|
+
else:
|
|
30
|
+
if os.path.exists(path):
|
|
31
|
+
os.remove(path)
|
|
32
|
+
|
|
33
|
+
def win_256_check(path):
|
|
21
34
|
if len(path) > 255 and sys.platform == "win32":
|
|
22
|
-
if path.startswith(u
|
|
23
|
-
|
|
35
|
+
if path.startswith(u"\\\\?\\"):
|
|
36
|
+
path = path
|
|
37
|
+
else:
|
|
38
|
+
path = u"\\\\?\\" + path
|
|
24
39
|
return path
|
|
25
40
|
|
|
26
41
|
def filter_win_hidden(path: str):
|
opex_manifest_generator/hash.py
CHANGED
|
@@ -6,13 +6,16 @@ license: Apache License 2.0"
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
import hashlib
|
|
9
|
+
from opex_manifest_generator.common import win_256_check
|
|
10
|
+
import os, io
|
|
9
11
|
|
|
10
12
|
class HashGenerator():
|
|
11
|
-
def __init__(self,algorithm="SHA-1"):
|
|
13
|
+
def __init__(self, algorithm: str = "SHA-1", buffer: int = 4096):
|
|
12
14
|
self.algorithm = algorithm
|
|
13
|
-
self.buffer =
|
|
15
|
+
self.buffer = buffer
|
|
14
16
|
|
|
15
|
-
def hash_generator(self,file_path: str):
|
|
17
|
+
def hash_generator(self, file_path: str):
|
|
18
|
+
file_path = win_256_check(file_path)
|
|
16
19
|
if self.algorithm == "SHA-1":
|
|
17
20
|
hash = hashlib.sha1()
|
|
18
21
|
elif self.algorithm == "MD5":
|
|
@@ -24,11 +27,39 @@ class HashGenerator():
|
|
|
24
27
|
else:
|
|
25
28
|
hash = hashlib.sha1()
|
|
26
29
|
print(f'Generating Fixity using {self.algorithm} for: {file_path}')
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
30
|
+
try:
|
|
31
|
+
with open(file_path, 'rb', buffering = 0) as f:
|
|
32
|
+
while True:
|
|
33
|
+
buff = f.read(self.buffer)
|
|
34
|
+
if not buff:
|
|
35
|
+
break
|
|
36
|
+
hash.update(buff)
|
|
37
|
+
f.close()
|
|
38
|
+
except Exception as e:
|
|
39
|
+
print(e)
|
|
40
|
+
raise SystemError()
|
|
41
|
+
return hash.hexdigest().upper()
|
|
42
|
+
|
|
43
|
+
def hash_generator_pax_zip(self, filename, z):
|
|
44
|
+
if self.algorithm == "SHA-1":
|
|
45
|
+
hash = hashlib.sha1()
|
|
46
|
+
elif self.algorithm == "MD5":
|
|
47
|
+
hash = hashlib.md5()
|
|
48
|
+
elif self.algorithm == "SHA-256":
|
|
49
|
+
hash = hashlib.sha256()
|
|
50
|
+
elif self.algorithm == "SHA-512":
|
|
51
|
+
hash = hashlib.sha512()
|
|
52
|
+
else:
|
|
53
|
+
hash = hashlib.sha1()
|
|
54
|
+
print(f'Generating Fixity using {self.algorithm} for: {filename}')
|
|
55
|
+
try:
|
|
56
|
+
with z.open(filename, 'r') as data:
|
|
57
|
+
while True:
|
|
58
|
+
buff = data.read(self.buffer)
|
|
59
|
+
if not buff:
|
|
60
|
+
break
|
|
61
|
+
hash.update(buff)
|
|
62
|
+
except Exception as e:
|
|
63
|
+
print(e)
|
|
64
|
+
raise SystemError()
|
|
34
65
|
return hash.hexdigest().upper()
|
|
@@ -8,11 +8,10 @@ license: Apache License 2.0"
|
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
10
|
import lxml.etree as ET
|
|
11
|
-
import
|
|
11
|
+
import pandas as pd
|
|
12
|
+
import os, time, datetime
|
|
12
13
|
from auto_classification_generator import ClassificationGenerator
|
|
13
14
|
from auto_classification_generator.common import export_list_txt, export_xl, export_csv, define_output_file
|
|
14
|
-
import datetime
|
|
15
|
-
import pandas as pd
|
|
16
15
|
from pandas.api.types import is_datetime64_any_dtype
|
|
17
16
|
from opex_manifest_generator.hash import HashGenerator
|
|
18
17
|
from opex_manifest_generator.common import *
|
|
@@ -34,7 +33,7 @@ class OpexManifestGenerator():
|
|
|
34
33
|
:param startref: set to set the starting reference number
|
|
35
34
|
:param algorithm: set whether to generate fixities and the algorithm to use {MD5, SHA-1, SHA-256, SHA-512}
|
|
36
35
|
:param empty_flag: set whether to delete and log empty directories
|
|
37
|
-
:param
|
|
36
|
+
:param removal_flag: set whether to enable removals; data must also contain removals column and cell be set to True
|
|
38
37
|
:param clear_opex_flag: set whether clear existing opexes
|
|
39
38
|
:param export_flag: set whether to export the spreadsheet when using autoclass
|
|
40
39
|
:param output_format: set output format when using autoclass {xlsx, csv}
|
|
@@ -60,9 +59,10 @@ class OpexManifestGenerator():
|
|
|
60
59
|
acc_prefix: str = None,
|
|
61
60
|
accession_mode: str = False,
|
|
62
61
|
startref: int = 1,
|
|
63
|
-
algorithm: str = None,
|
|
62
|
+
algorithm: list[str] = None,
|
|
63
|
+
pax_fixity: bool = False,
|
|
64
64
|
empty_flag: bool = False,
|
|
65
|
-
|
|
65
|
+
removal_flag: bool = False,
|
|
66
66
|
clear_opex_flag: bool = False,
|
|
67
67
|
export_flag: bool = False,
|
|
68
68
|
input: str = None,
|
|
@@ -84,10 +84,11 @@ class OpexManifestGenerator():
|
|
|
84
84
|
self.list_fixity = []
|
|
85
85
|
self.start_time = datetime.datetime.now()
|
|
86
86
|
self.algorithm = algorithm
|
|
87
|
+
self.pax_fixity_flag = pax_fixity
|
|
87
88
|
self.empty_flag = empty_flag
|
|
88
|
-
self.
|
|
89
|
-
if self.
|
|
90
|
-
self.
|
|
89
|
+
self.removal_flag = removal_flag
|
|
90
|
+
if self.removal_flag:
|
|
91
|
+
self.removal_list = []
|
|
91
92
|
self.export_flag = export_flag
|
|
92
93
|
self.startref = startref
|
|
93
94
|
self.autoclass_flag = autoclass_flag
|
|
@@ -153,23 +154,26 @@ class OpexManifestGenerator():
|
|
|
153
154
|
root_element = ET.QName(xml_file.find('.'))
|
|
154
155
|
root_element_ln = root_element.localname
|
|
155
156
|
for elem in xml_file.findall(".//"):
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
157
|
+
if elem.getchildren():
|
|
158
|
+
pass
|
|
159
|
+
else:
|
|
160
|
+
elem_path = xml_file.getelementpath(elem)
|
|
161
|
+
elem = ET.QName(elem)
|
|
162
|
+
elem_lnpath = elem_path.replace(f"{{{elem.namespace}}}", root_element_ln + ":")
|
|
163
|
+
print(elem_lnpath)
|
|
160
164
|
|
|
161
165
|
def set_input_flags(self):
|
|
162
|
-
if
|
|
166
|
+
if TITLE_FIELD in self.column_headers:
|
|
163
167
|
self.title_flag = True
|
|
164
|
-
if
|
|
168
|
+
if DESCRIPTION_FIELD in self.column_headers:
|
|
165
169
|
self.description_flag = True
|
|
166
|
-
if
|
|
170
|
+
if SECUIRTY_FIELD in self.column_headers:
|
|
167
171
|
self.security_flag = True
|
|
168
|
-
if
|
|
172
|
+
if SOURCEID_FIELD in self.column_headers:
|
|
169
173
|
self.sourceid_flag = True
|
|
170
|
-
if
|
|
174
|
+
if IGNORE_FIELD in self.column_headers:
|
|
171
175
|
self.ignore_flag = True
|
|
172
|
-
if
|
|
176
|
+
if HASH_FIELD in self.column_headers and ALGORITHM_FIELD in self.column_headers:
|
|
173
177
|
self.hash_from_spread = True
|
|
174
178
|
print("Hash detected in Spreadsheet; taking hashes from spreadsheet")
|
|
175
179
|
time.sleep(3)
|
|
@@ -243,32 +247,20 @@ class OpexManifestGenerator():
|
|
|
243
247
|
print('Error Looking up XIP Metadata')
|
|
244
248
|
print(e)
|
|
245
249
|
|
|
246
|
-
def
|
|
250
|
+
def removal_df_lookup(self, idx: pd.Index):
|
|
247
251
|
try:
|
|
248
252
|
if idx.empty:
|
|
249
253
|
return False
|
|
250
254
|
else:
|
|
251
255
|
remove = check_nan(self.df[REMOVAL_FIELD].loc[idx].item())
|
|
252
256
|
if remove is not None:
|
|
253
|
-
removed_list.append(path)
|
|
254
|
-
print(f"Removing: {path}")
|
|
255
|
-
if os.path.isdir(path):
|
|
256
|
-
for dp,d,f in os.walk(path):
|
|
257
|
-
for fn in f:
|
|
258
|
-
removed_list.append(win_256_check(dp+win_path_delimiter()+fn))
|
|
259
|
-
for dn in d:
|
|
260
|
-
removed_list.append(win_256_check(dp+win_path_delimiter()+dn))
|
|
261
|
-
shutil.rmtree(path)
|
|
262
|
-
else:
|
|
263
|
-
if os.path.exists(path):
|
|
264
|
-
os.remove(path)
|
|
265
257
|
return True
|
|
266
258
|
else:
|
|
267
259
|
return False
|
|
268
260
|
except Exception as e:
|
|
269
261
|
print('Error looking up Removals')
|
|
270
262
|
print(e)
|
|
271
|
-
|
|
263
|
+
|
|
272
264
|
def ignore_df_lookup(self, idx: pd.Index):
|
|
273
265
|
try:
|
|
274
266
|
if idx.empty:
|
|
@@ -298,11 +290,12 @@ class OpexManifestGenerator():
|
|
|
298
290
|
if idx.empty:
|
|
299
291
|
pass
|
|
300
292
|
else:
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
293
|
+
for algorithm_type in self.algorithm:
|
|
294
|
+
self.fixity = ET.SubElement(xml_fixities,f"{{{self.opexns}}}Fixity")
|
|
295
|
+
self.hash = self.df[HASH_FIELD].loc[idx].item()
|
|
296
|
+
self.algorithm = self.df[ALGORITHM_FIELD].loc[idx].item()
|
|
297
|
+
self.fixity.set('type', algorithm_type)
|
|
298
|
+
self.fixity.set('value',self.hash)
|
|
306
299
|
except Exception as e:
|
|
307
300
|
print('Error looking up Hash')
|
|
308
301
|
print(e)
|
|
@@ -386,13 +379,15 @@ class OpexManifestGenerator():
|
|
|
386
379
|
ns = elem_dict.get('Namespace')
|
|
387
380
|
try:
|
|
388
381
|
if self.metadata_flag in {'e', 'exact'}:
|
|
389
|
-
|
|
382
|
+
val_series = self.df[path].loc[idx]
|
|
383
|
+
val = check_nan(val_series.item())
|
|
390
384
|
elif self.metadata_flag in {'f', 'flat'}:
|
|
391
|
-
|
|
385
|
+
val_series = self.df[name].loc[idx]
|
|
386
|
+
val = check_nan(val_series.item())
|
|
392
387
|
if val is None:
|
|
393
388
|
continue
|
|
394
389
|
else:
|
|
395
|
-
if is_datetime64_any_dtype(
|
|
390
|
+
if is_datetime64_any_dtype(val_series):
|
|
396
391
|
val = pd.to_datetime(val)
|
|
397
392
|
val = datetime.datetime.strftime(val, "%Y-%m-%dT%H:%M:%S.000Z")
|
|
398
393
|
if self.metadata_flag in {'e','exact'}:
|
|
@@ -435,13 +430,26 @@ class OpexManifestGenerator():
|
|
|
435
430
|
if self.properties is None:
|
|
436
431
|
xmlroot.remove(self.properties)
|
|
437
432
|
|
|
438
|
-
def
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
433
|
+
def generate_opex_fixity(self, file_path: str):
|
|
434
|
+
for algorithm_type in self.OMG.algorithm:
|
|
435
|
+
self.fixity = ET.SubElement(self.fixities, f"{{{self.opexns}}}Fixity")
|
|
436
|
+
self.hash = HashGenerator(algorithm = algorithm_type).hash_generator(file_path)
|
|
437
|
+
self.fixity.set("type", algorithm_type)
|
|
438
|
+
self.fixity.set("value", self.hash)
|
|
439
|
+
self.OMG.list_fixity.append([algorithm_type, self.hash, file_path])
|
|
440
|
+
self.OMG.list_path.append(file_path)
|
|
441
|
+
|
|
442
|
+
def generate_pax_zip_opex_fixity(self, file_path):
|
|
443
|
+
for algorithm_type in self.OMG.algorithm:
|
|
444
|
+
z = zipfile.ZipFile(file_path,'r')
|
|
445
|
+
for file in z.filelist:
|
|
446
|
+
self.fixity = ET.SubElement(self.fixities, f"{{{self.opexns}}}Fixity")
|
|
447
|
+
self.hash = HashGenerator(algorithm = algorithm_type).hash_generator_pax_zip(file.filename, z)
|
|
448
|
+
self.fixity.set("path", file.filename)
|
|
449
|
+
self.fixity.set("type", algorithm_type)
|
|
450
|
+
self.fixity.set("value", self.hash)
|
|
451
|
+
self.OMG.list_fixity.append([algorithm_type, self.hash, file_path + file.filename])
|
|
452
|
+
self.OMG.list_path.append(file_path)
|
|
445
453
|
|
|
446
454
|
def main(self):
|
|
447
455
|
if self.print_xmls_flag:
|
|
@@ -468,9 +476,9 @@ class OpexManifestGenerator():
|
|
|
468
476
|
if self.algorithm:
|
|
469
477
|
output_path = define_output_file(self.output_path, self.root, self.meta_dir_flag, output_suffix = "_Fixities", output_format = "txt")
|
|
470
478
|
export_list_txt(self.list_fixity, output_path)
|
|
471
|
-
if self.
|
|
472
|
-
output_path = define_output_file(self.output_path, self.root, self.meta_dir_flag, output_suffix = "
|
|
473
|
-
export_list_txt(self.
|
|
479
|
+
if self.removal_flag:
|
|
480
|
+
output_path = define_output_file(self.output_path, self.root, self.meta_dir_flag, output_suffix = "_Removals", output_format = "txt")
|
|
481
|
+
export_list_txt(self.removal_list, output_path)
|
|
474
482
|
print_running_time(self.start_time)
|
|
475
483
|
|
|
476
484
|
class OpexDir(OpexManifestGenerator):
|
|
@@ -482,11 +490,10 @@ class OpexDir(OpexManifestGenerator):
|
|
|
482
490
|
self.folder_path = folder_path.replace(u'\\\\?\\', "")
|
|
483
491
|
else:
|
|
484
492
|
self.folder_path = folder_path
|
|
485
|
-
print(self.folder_path)
|
|
486
493
|
if any([self.OMG.input,
|
|
487
494
|
self.OMG.autoclass_flag in {"c","catalog","a","accession","b","both","cg","catalog-generic","ag","accession-generic","bg","both-generic"},
|
|
488
495
|
self.OMG.ignore_flag,
|
|
489
|
-
self.OMG.
|
|
496
|
+
self.OMG.removal_flag,
|
|
490
497
|
self.OMG.sourceid_flag,
|
|
491
498
|
self.OMG.title_flag,
|
|
492
499
|
self.OMG.description_flag,
|
|
@@ -502,16 +509,16 @@ class OpexDir(OpexManifestGenerator):
|
|
|
502
509
|
self.ignore = self.OMG.ignore_df_lookup(index)
|
|
503
510
|
if self.ignore:
|
|
504
511
|
return
|
|
505
|
-
if self.OMG.
|
|
506
|
-
self.removal = self.OMG.
|
|
512
|
+
if self.OMG.removal_flag:
|
|
513
|
+
self.removal = self.OMG.removal_df_lookup(index)
|
|
507
514
|
if self.removal:
|
|
515
|
+
remove_tree(self.folder_path, self.OMG.removal_list)
|
|
508
516
|
return
|
|
509
517
|
self.xmlroot = ET.Element(f"{{{self.opexns}}}OPEXMetadata", nsmap={"opex":self.opexns})
|
|
510
518
|
self.transfer = ET.SubElement(self.xmlroot, f"{{{self.opexns}}}Transfer")
|
|
511
519
|
self.manifest = ET.SubElement(self.transfer, f"{{{self.opexns}}}Manifest")
|
|
512
520
|
self.folders = ET.SubElement(self.manifest, f"{{{self.opexns}}}Folders")
|
|
513
521
|
self.files = ET.SubElement(self.manifest, f"{{{self.opexns}}}Files")
|
|
514
|
-
|
|
515
522
|
if self.OMG.title_flag or self.OMG.description_flag or self.OMG.security_flag:
|
|
516
523
|
self.title, self.description, self.security = self.OMG.xip_df_lookup(index)
|
|
517
524
|
elif self.OMG.autoclass_flag in {"generic", "g", "catalog-generic", "cg", "accession-generic", "ag", "both-generic", "bg"}:
|
|
@@ -524,6 +531,19 @@ class OpexDir(OpexManifestGenerator):
|
|
|
524
531
|
self.security = security
|
|
525
532
|
if self.OMG.sourceid_flag:
|
|
526
533
|
self.OMG.sourceid_df_lookup(self.transfer, self.folder_path, index)
|
|
534
|
+
if self.OMG.algorithm and self.OMG.pax_fixity_flag is True and self.folder_path.endswith(".pax"):
|
|
535
|
+
self.fixities = ET.SubElement(self.transfer, f"{{{self.opexns}}}Fixities")
|
|
536
|
+
for dir,_,files in os.walk(folder_path):
|
|
537
|
+
for filename in files:
|
|
538
|
+
rel_path = os.path.relpath(dir,folder_path)
|
|
539
|
+
rel_file = os.path.join(rel_path, filename)
|
|
540
|
+
abs_file = os.path.abspath(os.path.join(dir,filename))
|
|
541
|
+
self.generate_opex_fixity(abs_file)
|
|
542
|
+
self.fixity.set("path",rel_file)
|
|
543
|
+
file = ET.SubElement(self.files, f"{{{self.opexns}}}File")
|
|
544
|
+
file.set("type", "content")
|
|
545
|
+
file.set("size", str(os.path.getsize(abs_file)))
|
|
546
|
+
file.text = str(rel_file)
|
|
527
547
|
if self.OMG.autoclass_flag or self.OMG.input:
|
|
528
548
|
self.OMG.generate_opex_properties(self.xmlroot, index,
|
|
529
549
|
title = self.title,
|
|
@@ -554,27 +574,53 @@ class OpexDir(OpexManifestGenerator):
|
|
|
554
574
|
raise SystemError()
|
|
555
575
|
|
|
556
576
|
def generate_opex_dirs(self, path: str):
|
|
577
|
+
""""
|
|
578
|
+
This function loops recursively through a given directory.
|
|
579
|
+
|
|
580
|
+
There are two loops to first generate Opexes for Files;
|
|
581
|
+
"""
|
|
557
582
|
self = OpexDir(self.OMG, path)
|
|
558
|
-
|
|
583
|
+
if self.OMG.algorithm and self.OMG.pax_fixity_flag is True and self.folder_path.endswith(".pax"):
|
|
584
|
+
opex_path = os.path.abspath(self.folder_path)
|
|
585
|
+
else:
|
|
586
|
+
opex_path = os.path.join(os.path.abspath(self.folder_path), os.path.basename(self.folder_path))
|
|
587
|
+
#First Loop to Generate Folder Manifest Opexes & Individual File Opexes.
|
|
559
588
|
if self.removal is True:
|
|
589
|
+
#If removal is True for Folder, then it will be removed - Does not need to descend.
|
|
560
590
|
pass
|
|
561
591
|
else:
|
|
562
592
|
for f_path in self.filter_directories(path):
|
|
563
593
|
if f_path.endswith('.opex'):
|
|
594
|
+
#Ignores OPEX files / directories...
|
|
564
595
|
pass
|
|
565
596
|
elif os.path.isdir(f_path):
|
|
566
|
-
if self.ignore is True
|
|
597
|
+
if self.ignore is True or \
|
|
598
|
+
(self.OMG.removal_flag is True and \
|
|
599
|
+
self.OMG.removal_df_lookup(self.OMG.index_df_lookup(f_path)) is True):
|
|
600
|
+
#If Ignore is True, or the Folder below is marked for Removal: Don't add to Opex
|
|
567
601
|
pass
|
|
568
602
|
else:
|
|
603
|
+
#Add Folder to OPEX Manifest (doesn't get written yet...)
|
|
569
604
|
self.folder = ET.SubElement(self.folders, f"{{{self.opexns}}}Folder")
|
|
570
605
|
self.folder.text = str(os.path.basename(f_path))
|
|
571
|
-
self.
|
|
606
|
+
if self.OMG.algorithm and self.OMG.pax_fixity_flag is True and self.folder_path.endswith(".pax"):
|
|
607
|
+
#If using fixity, but the folder is a PAX & using PAX Fixity: End descent.
|
|
608
|
+
pass
|
|
609
|
+
else:
|
|
610
|
+
#Recurse Descent.
|
|
611
|
+
self.generate_opex_dirs(f_path)
|
|
612
|
+
elif os.path.isfile(f_path):
|
|
613
|
+
#Processes OPEXes for individual Files: this gets written.
|
|
614
|
+
OpexFile(self.OMG, f_path)
|
|
572
615
|
else:
|
|
573
|
-
|
|
616
|
+
print('Unknown File Type?')
|
|
617
|
+
pass
|
|
618
|
+
#Second Loop to add previously generated Opexes to Folder Manifest.
|
|
574
619
|
if self.removal is True or self.ignore is True:
|
|
575
620
|
pass
|
|
576
621
|
else:
|
|
577
622
|
if check_opex(opex_path):
|
|
623
|
+
#Only processing Opexes.
|
|
578
624
|
for f_path in self.filter_directories(path):
|
|
579
625
|
if os.path.isfile(f_path):
|
|
580
626
|
file = ET.SubElement(self.files, f"{{{self.opexns}}}File")
|
|
@@ -584,12 +630,14 @@ class OpexDir(OpexManifestGenerator):
|
|
|
584
630
|
file.set("type", "content")
|
|
585
631
|
file.set("size", str(os.path.getsize(f_path)))
|
|
586
632
|
file.text = str(os.path.basename(f_path))
|
|
633
|
+
#Writes Folder OPEX
|
|
587
634
|
write_opex(opex_path, self.xmlroot)
|
|
588
635
|
else:
|
|
636
|
+
#Avoids Override if exists, lets you continue where left off.
|
|
589
637
|
print(f"Avoiding override, Opex exists at: {opex_path}")
|
|
590
638
|
|
|
591
639
|
class OpexFile(OpexManifestGenerator):
|
|
592
|
-
def __init__(self, OMG: OpexManifestGenerator, file_path: str,
|
|
640
|
+
def __init__(self, OMG: OpexManifestGenerator, file_path: str, title: str = None, description: str = None, security: str = None):
|
|
593
641
|
self.OMG = OMG
|
|
594
642
|
self.opexns = self.OMG.opexns
|
|
595
643
|
if file_path.startswith(u'\\\\?\\'):
|
|
@@ -600,7 +648,7 @@ class OpexFile(OpexManifestGenerator):
|
|
|
600
648
|
if any([self.OMG.input,
|
|
601
649
|
self.OMG.autoclass_flag in {"c","catalog","a","accession","b","both","cg","catalog-generic","ag","accession-generic","bg","both-generic"},
|
|
602
650
|
self.OMG.ignore_flag,
|
|
603
|
-
self.OMG.
|
|
651
|
+
self.OMG.removal_flag,
|
|
604
652
|
self.OMG.sourceid_flag,
|
|
605
653
|
self.OMG.title_flag,
|
|
606
654
|
self.OMG.description_flag,
|
|
@@ -616,11 +664,10 @@ class OpexFile(OpexManifestGenerator):
|
|
|
616
664
|
self.ignore = self.OMG.ignore_df_lookup(index)
|
|
617
665
|
if self.ignore:
|
|
618
666
|
return
|
|
619
|
-
if self.OMG.
|
|
620
|
-
self.removal = self.OMG.
|
|
667
|
+
if self.OMG.removal_flag:
|
|
668
|
+
self.removal = self.OMG.removal_df_lookup(index)
|
|
621
669
|
if self.removal:
|
|
622
670
|
return
|
|
623
|
-
self.algorithm = algorithm
|
|
624
671
|
if self.OMG.title_flag or self.OMG.description_flag or self.OMG.security_flag:
|
|
625
672
|
self.title, self.description, self.security = self.OMG.xip_df_lookup(index)
|
|
626
673
|
elif self.OMG.autoclass_flag in {"generic", "g", "catalog-generic", "cg", "accession-generic", "ag", "both-generic", "bg"}:
|
|
@@ -641,7 +688,10 @@ class OpexFile(OpexManifestGenerator):
|
|
|
641
688
|
if self.OMG.hash_from_spread:
|
|
642
689
|
self.OMG.hash_df_lookup(self.fixities, index)
|
|
643
690
|
else:
|
|
644
|
-
self.
|
|
691
|
+
if self.OMG.pax_fixity_flag is True and (self.file_path.endswith("pax.zip") or self.file_path.endswith(".pax")):
|
|
692
|
+
self.generate_pax_zip_opex_fixity(self.file_path)
|
|
693
|
+
else:
|
|
694
|
+
self.generate_opex_fixity(self.file_path)
|
|
645
695
|
if self.transfer is None:
|
|
646
696
|
self.xmlroot.remove(self.transfer)
|
|
647
697
|
if self.OMG.autoclass_flag or self.OMG.input:
|