opex-manifest-generator 1.2.2__py3-none-any.whl → 1.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -30,9 +30,11 @@ def parse_args():
30
30
  help= """Assign a prefix when utilising the --autoclass option. Prefix will append any text before all generated text.
31
31
  When utilising the {both} option fill in like: [catalog-prefix, accession-prefix] without square brackets.
32
32
  """)
33
- parser.add_argument("-fx", "--fixity", required = False, const = "SHA-1", default = None,
34
- nargs = '?', choices = ['NONE', 'SHA-1', 'MD5', 'SHA-256', 'SHA-512'], type = str.upper,
35
- help="Generates a hash for each file and adds it to the opex, can select the algorithm to utilise.")
33
+ parser.add_argument("-fx", "--fixity", required = False, nargs = '*', default = None,
34
+ choices = ['NONE', 'SHA-1', 'MD5', 'SHA-256', 'SHA-512'], type = str.upper, action=EmptyIsTrueFixity,
35
+ help="Generates a hash for each file and adds it to the opex, can select one or more algorithms to utilise. -fx SHA-1 MD5")
36
+ parser.add_argument("--pax-fixity", required = False, action = 'store_true', default = False,
37
+ help="Enables use of PAX fixity generation, in line with Preservica's Recommendation. Files / folders ending in .pax or .pax.zip will have individual files in folder / zip added to Opex.")
36
38
  parser.add_argument("-rme", "--remove-empty", required = False, action = 'store_true', default = False,
37
39
  help = "Remove and log empty directories from root. Log will be exported to 'meta' / output folder.")
38
40
  parser.add_argument("-o", "--output", required = False, nargs = 1,
@@ -92,11 +94,14 @@ def run_cli():
92
94
  if args.input and args.autoclass:
93
95
  print(f'Both Input and Auto-Class options have been selected, please use only one...')
94
96
  time.sleep(5); raise SystemExit()
97
+ if args.remove and not args.input:
98
+ print('Removal flag has been given without input, please ensure an input file is utilised when using this option.')
99
+ time.sleep(5); raise SystemExit()
95
100
  if not args.metadata in {'none', 'n'} and not args.input:
96
101
  print(f'Warning: Metadata Flag has been given without Input. Metadata won\'t be generated.')
97
102
  time.sleep(5)
98
103
  if args.print_xmls:
99
- OpexManifestGenerator.print_descriptive_xmls()
104
+ OpexManifestGenerator(root = args.root).print_descriptive_xmls()
100
105
  acc_prefix = None
101
106
  if args.autoclass in {"accession", "a", "accession-generic", "ag", "both", "b", "both-generic", "bg"} and args.accession_mode is None:
102
107
  args.accession_mode = "file"
@@ -159,9 +164,10 @@ def run_cli():
159
164
  accession_mode=args.accession_mode,
160
165
  acc_prefix = acc_prefix,
161
166
  empty_flag = args.remove_empty,
162
- remove_flag = args.remove,
167
+ removal_flag = args.remove,
163
168
  clear_opex_flag = args.clear_opex,
164
- algorithm = args.fixity,
169
+ algorithm = args.fixity,
170
+ pax_fixity= args.pax_fixity,
165
171
  startref = args.start_ref,
166
172
  export_flag = args.export,
167
173
  meta_dir_flag = args.disable_meta_dir,
@@ -178,6 +184,12 @@ def run_cli():
178
184
  sort_key = sort_key,
179
185
  delimiter = args.delimiter,
180
186
  keywords_abbreviation_number = args.keywords_abbreviation_number).main()
181
-
187
+
188
+ class EmptyIsTrueFixity(argparse.Action):
189
+ def __call__(self, parser, namespace, values, option_string=None):
190
+ if len(values) == 0:
191
+ values = ["SHA-1"]
192
+ setattr(namespace, self.dest, values)
193
+
182
194
  if __name__ == "__main__":
183
195
  run_cli()
@@ -5,8 +5,7 @@ author: Christopher Prince
5
5
  license: Apache License 2.0"
6
6
  """
7
7
 
8
- import zipfile, os, sys, time, stat
9
- import datetime
8
+ import zipfile, os, sys, time, stat, datetime, shutil
10
9
  from lxml import etree
11
10
 
12
11
  def zip_opex(file_path,opex_path):
@@ -17,10 +16,26 @@ def zip_opex(file_path,opex_path):
17
16
  z.write(opex_path,os.path.basename(opex_path))
18
17
  else: print(f'A zip file already exists for: {zip_file}')
19
18
 
20
- def win_256_check(path: str):
19
+ def remove_tree(path: str, removed_list: list):
20
+ removed_list.append(path)
21
+ print(f"Removing: {path}")
22
+ if os.path.isdir(path):
23
+ for dp,d,f in os.walk(path):
24
+ for fn in f:
25
+ removed_list.append(win_256_check(dp+win_path_delimiter()+fn))
26
+ for dn in d:
27
+ removed_list.append(win_256_check(dp+win_path_delimiter()+dn))
28
+ shutil.rmtree(path)
29
+ else:
30
+ if os.path.exists(path):
31
+ os.remove(path)
32
+
33
+ def win_256_check(path):
21
34
  if len(path) > 255 and sys.platform == "win32":
22
- if path.startswith(u'\\\\?\\'): path = path
23
- else: path = u"\\\\?\\" + path
35
+ if path.startswith(u"\\\\?\\"):
36
+ path = path
37
+ else:
38
+ path = u"\\\\?\\" + path
24
39
  return path
25
40
 
26
41
  def filter_win_hidden(path: str):
@@ -6,13 +6,16 @@ license: Apache License 2.0"
6
6
  """
7
7
 
8
8
  import hashlib
9
+ from opex_manifest_generator.common import win_256_check
10
+ import os, io
9
11
 
10
12
  class HashGenerator():
11
- def __init__(self,algorithm="SHA-1"):
13
+ def __init__(self, algorithm: str = "SHA-1", buffer: int = 4096):
12
14
  self.algorithm = algorithm
13
- self.buffer = 4096
15
+ self.buffer = buffer
14
16
 
15
- def hash_generator(self,file_path: str):
17
+ def hash_generator(self, file_path: str):
18
+ file_path = win_256_check(file_path)
16
19
  if self.algorithm == "SHA-1":
17
20
  hash = hashlib.sha1()
18
21
  elif self.algorithm == "MD5":
@@ -24,11 +27,39 @@ class HashGenerator():
24
27
  else:
25
28
  hash = hashlib.sha1()
26
29
  print(f'Generating Fixity using {self.algorithm} for: {file_path}')
27
- with open(file_path, "rb") as f:
28
- while True:
29
- buff = f.read(self.buffer)
30
- if not buff:
31
- break
32
- hash.update(buff)
33
- f.close()
30
+ try:
31
+ with open(file_path, 'rb', buffering = 0) as f:
32
+ while True:
33
+ buff = f.read(self.buffer)
34
+ if not buff:
35
+ break
36
+ hash.update(buff)
37
+ f.close()
38
+ except Exception as e:
39
+ print(e)
40
+ raise SystemError()
41
+ return hash.hexdigest().upper()
42
+
43
+ def hash_generator_pax_zip(self, filename, z):
44
+ if self.algorithm == "SHA-1":
45
+ hash = hashlib.sha1()
46
+ elif self.algorithm == "MD5":
47
+ hash = hashlib.md5()
48
+ elif self.algorithm == "SHA-256":
49
+ hash = hashlib.sha256()
50
+ elif self.algorithm == "SHA-512":
51
+ hash = hashlib.sha512()
52
+ else:
53
+ hash = hashlib.sha1()
54
+ print(f'Generating Fixity using {self.algorithm} for: {filename}')
55
+ try:
56
+ with z.open(filename, 'r') as data:
57
+ while True:
58
+ buff = data.read(self.buffer)
59
+ if not buff:
60
+ break
61
+ hash.update(buff)
62
+ except Exception as e:
63
+ print(e)
64
+ raise SystemError()
34
65
  return hash.hexdigest().upper()
@@ -8,11 +8,10 @@ license: Apache License 2.0"
8
8
  """
9
9
 
10
10
  import lxml.etree as ET
11
- import os, time, shutil
11
+ import pandas as pd
12
+ import os, time, datetime
12
13
  from auto_classification_generator import ClassificationGenerator
13
14
  from auto_classification_generator.common import export_list_txt, export_xl, export_csv, define_output_file
14
- import datetime
15
- import pandas as pd
16
15
  from pandas.api.types import is_datetime64_any_dtype
17
16
  from opex_manifest_generator.hash import HashGenerator
18
17
  from opex_manifest_generator.common import *
@@ -34,7 +33,7 @@ class OpexManifestGenerator():
34
33
  :param startref: set to set the starting reference number
35
34
  :param algorithm: set whether to generate fixities and the algorithm to use {MD5, SHA-1, SHA-256, SHA-512}
36
35
  :param empty_flag: set whether to delete and log empty directories
37
- :param remove_flag: set whether to enable removals; data must also contain removals column and cell be set to True
36
+ :param removal_flag: set whether to enable removals; data must also contain removals column and cell be set to True
38
37
  :param clear_opex_flag: set whether clear existing opexes
39
38
  :param export_flag: set whether to export the spreadsheet when using autoclass
40
39
  :param output_format: set output format when using autoclass {xlsx, csv}
@@ -60,9 +59,10 @@ class OpexManifestGenerator():
60
59
  acc_prefix: str = None,
61
60
  accession_mode: str = False,
62
61
  startref: int = 1,
63
- algorithm: str = None,
62
+ algorithm: list[str] = None,
63
+ pax_fixity: bool = False,
64
64
  empty_flag: bool = False,
65
- remove_flag: bool = False,
65
+ removal_flag: bool = False,
66
66
  clear_opex_flag: bool = False,
67
67
  export_flag: bool = False,
68
68
  input: str = None,
@@ -84,10 +84,11 @@ class OpexManifestGenerator():
84
84
  self.list_fixity = []
85
85
  self.start_time = datetime.datetime.now()
86
86
  self.algorithm = algorithm
87
+ self.pax_fixity_flag = pax_fixity
87
88
  self.empty_flag = empty_flag
88
- self.remove_flag = remove_flag
89
- if self.remove_flag:
90
- self.remove_list = []
89
+ self.removal_flag = removal_flag
90
+ if self.removal_flag:
91
+ self.removal_list = []
91
92
  self.export_flag = export_flag
92
93
  self.startref = startref
93
94
  self.autoclass_flag = autoclass_flag
@@ -153,23 +154,26 @@ class OpexManifestGenerator():
153
154
  root_element = ET.QName(xml_file.find('.'))
154
155
  root_element_ln = root_element.localname
155
156
  for elem in xml_file.findall(".//"):
156
- elem_path = xml_file.getelementpath(elem)
157
- elem = ET.QName(elem)
158
- elem_lnpath = elem_path.replace(f"{{{elem.namespace}}}", root_element_ln + ":")
159
- print(elem_lnpath)
157
+ if elem.getchildren():
158
+ pass
159
+ else:
160
+ elem_path = xml_file.getelementpath(elem)
161
+ elem = ET.QName(elem)
162
+ elem_lnpath = elem_path.replace(f"{{{elem.namespace}}}", root_element_ln + ":")
163
+ print(elem_lnpath)
160
164
 
161
165
  def set_input_flags(self):
162
- if 'Title' in self.column_headers:
166
+ if TITLE_FIELD in self.column_headers:
163
167
  self.title_flag = True
164
- if 'Description' in self.column_headers:
168
+ if DESCRIPTION_FIELD in self.column_headers:
165
169
  self.description_flag = True
166
- if 'Security' in self.column_headers:
170
+ if SECUIRTY_FIELD in self.column_headers:
167
171
  self.security_flag = True
168
- if 'SourceID' in self.column_headers:
172
+ if SOURCEID_FIELD in self.column_headers:
169
173
  self.sourceid_flag = True
170
- if 'Ignore' in self.column_headers:
174
+ if IGNORE_FIELD in self.column_headers:
171
175
  self.ignore_flag = True
172
- if 'Hash' in self.column_headers and 'Algorithm' in self.column_headers:
176
+ if HASH_FIELD in self.column_headers and ALGORITHM_FIELD in self.column_headers:
173
177
  self.hash_from_spread = True
174
178
  print("Hash detected in Spreadsheet; taking hashes from spreadsheet")
175
179
  time.sleep(3)
@@ -243,32 +247,20 @@ class OpexManifestGenerator():
243
247
  print('Error Looking up XIP Metadata')
244
248
  print(e)
245
249
 
246
- def remove_df_lookup(self, path: str, removed_list: list, idx: pd.Index):
250
+ def removal_df_lookup(self, idx: pd.Index):
247
251
  try:
248
252
  if idx.empty:
249
253
  return False
250
254
  else:
251
255
  remove = check_nan(self.df[REMOVAL_FIELD].loc[idx].item())
252
256
  if remove is not None:
253
- removed_list.append(path)
254
- print(f"Removing: {path}")
255
- if os.path.isdir(path):
256
- for dp,d,f in os.walk(path):
257
- for fn in f:
258
- removed_list.append(win_256_check(dp+win_path_delimiter()+fn))
259
- for dn in d:
260
- removed_list.append(win_256_check(dp+win_path_delimiter()+dn))
261
- shutil.rmtree(path)
262
- else:
263
- if os.path.exists(path):
264
- os.remove(path)
265
257
  return True
266
258
  else:
267
259
  return False
268
260
  except Exception as e:
269
261
  print('Error looking up Removals')
270
262
  print(e)
271
-
263
+
272
264
  def ignore_df_lookup(self, idx: pd.Index):
273
265
  try:
274
266
  if idx.empty:
@@ -298,11 +290,12 @@ class OpexManifestGenerator():
298
290
  if idx.empty:
299
291
  pass
300
292
  else:
301
- self.fixity = ET.SubElement(xml_fixities,f"{{{self.opexns}}}Fixity")
302
- self.hash = self.df[HASH_FIELD].loc[idx].item()
303
- self.algorithm = self.df[ALGORITHM_FIELD].loc[idx].item()
304
- self.fixity.set('type', self.algorithm)
305
- self.fixity.set('value',self.hash)
293
+ for algorithm_type in self.algorithm:
294
+ self.fixity = ET.SubElement(xml_fixities,f"{{{self.opexns}}}Fixity")
295
+ self.hash = self.df[HASH_FIELD].loc[idx].item()
296
+ self.algorithm = self.df[ALGORITHM_FIELD].loc[idx].item()
297
+ self.fixity.set('type', algorithm_type)
298
+ self.fixity.set('value',self.hash)
306
299
  except Exception as e:
307
300
  print('Error looking up Hash')
308
301
  print(e)
@@ -386,13 +379,15 @@ class OpexManifestGenerator():
386
379
  ns = elem_dict.get('Namespace')
387
380
  try:
388
381
  if self.metadata_flag in {'e', 'exact'}:
389
- val = check_nan(self.df[path].loc[idx].item())
382
+ val_series = self.df[path].loc[idx]
383
+ val = check_nan(val_series.item())
390
384
  elif self.metadata_flag in {'f', 'flat'}:
391
- val = check_nan(self.df[name].loc[idx].item())
385
+ val_series = self.df[name].loc[idx]
386
+ val = check_nan(val_series.item())
392
387
  if val is None:
393
388
  continue
394
389
  else:
395
- if is_datetime64_any_dtype(str(val)):
390
+ if is_datetime64_any_dtype(val_series):
396
391
  val = pd.to_datetime(val)
397
392
  val = datetime.datetime.strftime(val, "%Y-%m-%dT%H:%M:%S.000Z")
398
393
  if self.metadata_flag in {'e','exact'}:
@@ -435,13 +430,26 @@ class OpexManifestGenerator():
435
430
  if self.properties is None:
436
431
  xmlroot.remove(self.properties)
437
432
 
438
- def genererate_opex_fixity(self, file_path: str):
439
- self.fixity = ET.SubElement(self.fixities, f"{{{self.opexns}}}Fixity")
440
- self.hash = HashGenerator(algorithm = self.algorithm).hash_generator(file_path)
441
- self.fixity.set("type", self.algorithm)
442
- self.fixity.set("value", self.hash)
443
- self.OMG.list_fixity.append([self.algorithm, self.hash, file_path])
444
- self.OMG.list_path.append(file_path)
433
+ def generate_opex_fixity(self, file_path: str):
434
+ for algorithm_type in self.OMG.algorithm:
435
+ self.fixity = ET.SubElement(self.fixities, f"{{{self.opexns}}}Fixity")
436
+ self.hash = HashGenerator(algorithm = algorithm_type).hash_generator(file_path)
437
+ self.fixity.set("type", algorithm_type)
438
+ self.fixity.set("value", self.hash)
439
+ self.OMG.list_fixity.append([algorithm_type, self.hash, file_path])
440
+ self.OMG.list_path.append(file_path)
441
+
442
+ def generate_pax_zip_opex_fixity(self, file_path):
443
+ for algorithm_type in self.OMG.algorithm:
444
+ z = zipfile.ZipFile(file_path,'r')
445
+ for file in z.filelist:
446
+ self.fixity = ET.SubElement(self.fixities, f"{{{self.opexns}}}Fixity")
447
+ self.hash = HashGenerator(algorithm = algorithm_type).hash_generator_pax_zip(file.filename, z)
448
+ self.fixity.set("path", file.filename)
449
+ self.fixity.set("type", algorithm_type)
450
+ self.fixity.set("value", self.hash)
451
+ self.OMG.list_fixity.append([algorithm_type, self.hash, file_path + file.filename])
452
+ self.OMG.list_path.append(file_path)
445
453
 
446
454
  def main(self):
447
455
  if self.print_xmls_flag:
@@ -468,9 +476,9 @@ class OpexManifestGenerator():
468
476
  if self.algorithm:
469
477
  output_path = define_output_file(self.output_path, self.root, self.meta_dir_flag, output_suffix = "_Fixities", output_format = "txt")
470
478
  export_list_txt(self.list_fixity, output_path)
471
- if self.remove_flag:
472
- output_path = define_output_file(self.output_path, self.root, self.meta_dir_flag, output_suffix = "_Removed", output_format = "txt")
473
- export_list_txt(self.remove_list, output_path)
479
+ if self.removal_flag:
480
+ output_path = define_output_file(self.output_path, self.root, self.meta_dir_flag, output_suffix = "_Removals", output_format = "txt")
481
+ export_list_txt(self.removal_list, output_path)
474
482
  print_running_time(self.start_time)
475
483
 
476
484
  class OpexDir(OpexManifestGenerator):
@@ -482,11 +490,10 @@ class OpexDir(OpexManifestGenerator):
482
490
  self.folder_path = folder_path.replace(u'\\\\?\\', "")
483
491
  else:
484
492
  self.folder_path = folder_path
485
- print(self.folder_path)
486
493
  if any([self.OMG.input,
487
494
  self.OMG.autoclass_flag in {"c","catalog","a","accession","b","both","cg","catalog-generic","ag","accession-generic","bg","both-generic"},
488
495
  self.OMG.ignore_flag,
489
- self.OMG.remove_flag,
496
+ self.OMG.removal_flag,
490
497
  self.OMG.sourceid_flag,
491
498
  self.OMG.title_flag,
492
499
  self.OMG.description_flag,
@@ -502,16 +509,16 @@ class OpexDir(OpexManifestGenerator):
502
509
  self.ignore = self.OMG.ignore_df_lookup(index)
503
510
  if self.ignore:
504
511
  return
505
- if self.OMG.remove_flag:
506
- self.removal = self.OMG.remove_df_lookup(self.folder_path, self.OMG.remove_list, index)
512
+ if self.OMG.removal_flag:
513
+ self.removal = self.OMG.removal_df_lookup(index)
507
514
  if self.removal:
515
+ remove_tree(self.folder_path, self.OMG.removal_list)
508
516
  return
509
517
  self.xmlroot = ET.Element(f"{{{self.opexns}}}OPEXMetadata", nsmap={"opex":self.opexns})
510
518
  self.transfer = ET.SubElement(self.xmlroot, f"{{{self.opexns}}}Transfer")
511
519
  self.manifest = ET.SubElement(self.transfer, f"{{{self.opexns}}}Manifest")
512
520
  self.folders = ET.SubElement(self.manifest, f"{{{self.opexns}}}Folders")
513
521
  self.files = ET.SubElement(self.manifest, f"{{{self.opexns}}}Files")
514
-
515
522
  if self.OMG.title_flag or self.OMG.description_flag or self.OMG.security_flag:
516
523
  self.title, self.description, self.security = self.OMG.xip_df_lookup(index)
517
524
  elif self.OMG.autoclass_flag in {"generic", "g", "catalog-generic", "cg", "accession-generic", "ag", "both-generic", "bg"}:
@@ -524,6 +531,19 @@ class OpexDir(OpexManifestGenerator):
524
531
  self.security = security
525
532
  if self.OMG.sourceid_flag:
526
533
  self.OMG.sourceid_df_lookup(self.transfer, self.folder_path, index)
534
+ if self.OMG.algorithm and self.OMG.pax_fixity_flag is True and self.folder_path.endswith(".pax"):
535
+ self.fixities = ET.SubElement(self.transfer, f"{{{self.opexns}}}Fixities")
536
+ for dir,_,files in os.walk(folder_path):
537
+ for filename in files:
538
+ rel_path = os.path.relpath(dir,folder_path)
539
+ rel_file = os.path.join(rel_path, filename)
540
+ abs_file = os.path.abspath(os.path.join(dir,filename))
541
+ self.generate_opex_fixity(abs_file)
542
+ self.fixity.set("path",rel_file)
543
+ file = ET.SubElement(self.files, f"{{{self.opexns}}}File")
544
+ file.set("type", "content")
545
+ file.set("size", str(os.path.getsize(abs_file)))
546
+ file.text = str(rel_file)
527
547
  if self.OMG.autoclass_flag or self.OMG.input:
528
548
  self.OMG.generate_opex_properties(self.xmlroot, index,
529
549
  title = self.title,
@@ -554,27 +574,53 @@ class OpexDir(OpexManifestGenerator):
554
574
  raise SystemError()
555
575
 
556
576
  def generate_opex_dirs(self, path: str):
577
+ """"
578
+ This function loops recursively through a given directory.
579
+
580
+ There are two loops to first generate Opexes for Files;
581
+ """
557
582
  self = OpexDir(self.OMG, path)
558
- opex_path = os.path.join(os.path.abspath(self.folder_path), os.path.basename(self.folder_path))
583
+ if self.OMG.algorithm and self.OMG.pax_fixity_flag is True and self.folder_path.endswith(".pax"):
584
+ opex_path = os.path.abspath(self.folder_path)
585
+ else:
586
+ opex_path = os.path.join(os.path.abspath(self.folder_path), os.path.basename(self.folder_path))
587
+ #First Loop to Generate Folder Manifest Opexes & Individual File Opexes.
559
588
  if self.removal is True:
589
+ #If removal is True for Folder, then it will be removed - Does not need to descend.
560
590
  pass
561
591
  else:
562
592
  for f_path in self.filter_directories(path):
563
593
  if f_path.endswith('.opex'):
594
+ #Ignores OPEX files / directories...
564
595
  pass
565
596
  elif os.path.isdir(f_path):
566
- if self.ignore is True:
597
+ if self.ignore is True or \
598
+ (self.OMG.removal_flag is True and \
599
+ self.OMG.removal_df_lookup(self.OMG.index_df_lookup(f_path)) is True):
600
+ #If Ignore is True, or the Folder below is marked for Removal: Don't add to Opex
567
601
  pass
568
602
  else:
603
+ #Add Folder to OPEX Manifest (doesn't get written yet...)
569
604
  self.folder = ET.SubElement(self.folders, f"{{{self.opexns}}}Folder")
570
605
  self.folder.text = str(os.path.basename(f_path))
571
- self.generate_opex_dirs(f_path)
606
+ if self.OMG.algorithm and self.OMG.pax_fixity_flag is True and self.folder_path.endswith(".pax"):
607
+ #If using fixity, but the folder is a PAX & using PAX Fixity: End descent.
608
+ pass
609
+ else:
610
+ #Recurse Descent.
611
+ self.generate_opex_dirs(f_path)
612
+ elif os.path.isfile(f_path):
613
+ #Processes OPEXes for individual Files: this gets written.
614
+ OpexFile(self.OMG, f_path)
572
615
  else:
573
- OpexFile(self.OMG, f_path, self.OMG.algorithm)
616
+ print('Unknown File Type?')
617
+ pass
618
+ #Second Loop to add previously generated Opexes to Folder Manifest.
574
619
  if self.removal is True or self.ignore is True:
575
620
  pass
576
621
  else:
577
622
  if check_opex(opex_path):
623
+ #Only processing Opexes.
578
624
  for f_path in self.filter_directories(path):
579
625
  if os.path.isfile(f_path):
580
626
  file = ET.SubElement(self.files, f"{{{self.opexns}}}File")
@@ -584,12 +630,14 @@ class OpexDir(OpexManifestGenerator):
584
630
  file.set("type", "content")
585
631
  file.set("size", str(os.path.getsize(f_path)))
586
632
  file.text = str(os.path.basename(f_path))
633
+ #Writes Folder OPEX
587
634
  write_opex(opex_path, self.xmlroot)
588
635
  else:
636
+ #Avoids Override if exists, lets you continue where left off.
589
637
  print(f"Avoiding override, Opex exists at: {opex_path}")
590
638
 
591
639
  class OpexFile(OpexManifestGenerator):
592
- def __init__(self, OMG: OpexManifestGenerator, file_path: str, algorithm: str = None, title: str = None, description: str = None, security: str = None):
640
+ def __init__(self, OMG: OpexManifestGenerator, file_path: str, title: str = None, description: str = None, security: str = None):
593
641
  self.OMG = OMG
594
642
  self.opexns = self.OMG.opexns
595
643
  if file_path.startswith(u'\\\\?\\'):
@@ -600,7 +648,7 @@ class OpexFile(OpexManifestGenerator):
600
648
  if any([self.OMG.input,
601
649
  self.OMG.autoclass_flag in {"c","catalog","a","accession","b","both","cg","catalog-generic","ag","accession-generic","bg","both-generic"},
602
650
  self.OMG.ignore_flag,
603
- self.OMG.remove_flag,
651
+ self.OMG.removal_flag,
604
652
  self.OMG.sourceid_flag,
605
653
  self.OMG.title_flag,
606
654
  self.OMG.description_flag,
@@ -616,11 +664,10 @@ class OpexFile(OpexManifestGenerator):
616
664
  self.ignore = self.OMG.ignore_df_lookup(index)
617
665
  if self.ignore:
618
666
  return
619
- if self.OMG.remove_flag:
620
- self.removal = self.OMG.remove_df_lookup(self.file_path, self.OMG.remove_list, index)
667
+ if self.OMG.removal_flag:
668
+ self.removal = self.OMG.removal_df_lookup(index)
621
669
  if self.removal:
622
670
  return
623
- self.algorithm = algorithm
624
671
  if self.OMG.title_flag or self.OMG.description_flag or self.OMG.security_flag:
625
672
  self.title, self.description, self.security = self.OMG.xip_df_lookup(index)
626
673
  elif self.OMG.autoclass_flag in {"generic", "g", "catalog-generic", "cg", "accession-generic", "ag", "both-generic", "bg"}:
@@ -641,7 +688,10 @@ class OpexFile(OpexManifestGenerator):
641
688
  if self.OMG.hash_from_spread:
642
689
  self.OMG.hash_df_lookup(self.fixities, index)
643
690
  else:
644
- self.genererate_opex_fixity(self.file_path)
691
+ if self.OMG.pax_fixity_flag is True and (self.file_path.endswith("pax.zip") or self.file_path.endswith(".pax")):
692
+ self.generate_pax_zip_opex_fixity(self.file_path)
693
+ else:
694
+ self.generate_opex_fixity(self.file_path)
645
695
  if self.transfer is None:
646
696
  self.xmlroot.remove(self.transfer)
647
697
  if self.OMG.autoclass_flag or self.OMG.input: