opex-manifest-generator 1.1.10__py3-none-any.whl → 1.1.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opex_manifest_generator/cli.py +63 -24
- opex_manifest_generator/common.py +38 -2
- opex_manifest_generator/opex_manifest.py +229 -259
- opex_manifest_generator/options.properties +13 -0
- {opex_manifest_generator-1.1.10.dist-info → opex_manifest_generator-1.1.11.dist-info}/METADATA +1 -1
- {opex_manifest_generator-1.1.10.dist-info → opex_manifest_generator-1.1.11.dist-info}/RECORD +10 -9
- {opex_manifest_generator-1.1.10.dist-info → opex_manifest_generator-1.1.11.dist-info}/WHEEL +1 -1
- {opex_manifest_generator-1.1.10.dist-info → opex_manifest_generator-1.1.11.dist-info}/LICENSE.md +0 -0
- {opex_manifest_generator-1.1.10.dist-info → opex_manifest_generator-1.1.11.dist-info}/entry_points.txt +0 -0
- {opex_manifest_generator-1.1.10.dist-info → opex_manifest_generator-1.1.11.dist-info}/top_level.txt +0 -0
opex_manifest_generator/cli.py
CHANGED
|
@@ -13,24 +13,66 @@ import importlib.metadata
|
|
|
13
13
|
|
|
14
14
|
def parse_args():
|
|
15
15
|
parser = argparse.ArgumentParser(description = "OPEX Manifest Generator for Preservica Uploads")
|
|
16
|
-
parser.add_argument('root', default = os.getcwd())
|
|
17
|
-
parser.add_argument("-c", "--autoclass", required = False,
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
16
|
+
parser.add_argument('root', default = os.getcwd(), help = "The root path to generate Opexes for")
|
|
17
|
+
parser.add_argument("-c", "--autoclass", required = False,
|
|
18
|
+
choices = ['catalog', 'c', 'accession', 'a', 'both', 'b', 'generic', 'g', 'catalog-generic', 'cg', "accession-generic", "ag", "both-generic", "bg"],
|
|
19
|
+
type = str.lower,
|
|
20
|
+
help="""Toggles whether to utilise the auto_classification_generator
|
|
21
|
+
to generate an on the fly Reference listing.
|
|
22
|
+
|
|
23
|
+
There are several options, {catalog} will generate
|
|
24
|
+
a Archival Reference following an ISAD(G) sturcutre.
|
|
25
|
+
{accession} will create a running number of files.
|
|
26
|
+
{both} will do both at the same time!
|
|
27
|
+
{generic} will populate the title and description fields with the folder/file's name,
|
|
28
|
+
if used in conjunction with one of the above options:
|
|
29
|
+
{generic-catalog,generic-accession, generic-both} it will do both simultaneously.
|
|
30
|
+
""")
|
|
31
|
+
parser.add_argument("-p", "--prefix", required = False, nargs = '+',
|
|
32
|
+
help= """Assign a prefix when utilising the --autoclass option. Prefix will append any text before all generated text.
|
|
33
|
+
When utilising the {both} option fill in like: [catalog-prefix, accession-prefix] without square brackets.
|
|
34
|
+
""")
|
|
35
|
+
parser.add_argument("-fx", "--fixity", required = False, const = "SHA-1", default = None,
|
|
36
|
+
nargs = '?', choices = ['NONE', 'SHA-1', 'MD5', 'SHA-256', 'SHA-512'], type = str.upper,
|
|
37
|
+
help="Generates a hash for each file and adds it to the opex, can select the algorithm to utilise.")
|
|
38
|
+
parser.add_argument("-rme", "--remove-empty", required = False, action = 'store_true', default = False,
|
|
39
|
+
help = "Remove and log empty directories from root. Log will be exported to 'meta' / output folder.")
|
|
40
|
+
parser.add_argument("-o", "--output", required = False, nargs = 1,
|
|
41
|
+
help = "Sets the output to send any generated files to. Will not affect creation of a meta dir.")
|
|
42
|
+
parser.add_argument("-dmd", "--disable-meta-dir", required = False, action = 'store_false',
|
|
43
|
+
help = """Set whether to disable the creation of a 'meta' directory for generated files,
|
|
44
|
+
default behaviour is to always generate this directory""")
|
|
45
|
+
parser.add_argument("-clr", "--clear-opex", required = False, action = 'store_true', default = False,
|
|
46
|
+
help = """Clears existing opex files from a directory. If set with no further options will only clear opexes;
|
|
47
|
+
if multiple options are set will clear opexes and then run the program""")
|
|
48
|
+
parser.add_argument("-opt","--options-file", required = False, default=os.path.join(os.path.dirname(__file__),'options.properties'),
|
|
49
|
+
help="Specify a custom Options file, changing the set presets for column headers (Title,Description,etc)")
|
|
50
|
+
parser.add_argument("-s", "--start-ref", required = False, nargs = '?', default = 1,
|
|
51
|
+
help="Set a custom Starting reference for the Auto Classification generator. The generated reference will")
|
|
52
|
+
parser.add_argument("-mdir","--metadata-dir", required=False, nargs= '?',
|
|
53
|
+
default = os.path.join(os.path.dirname(os.path.realpath(__file__)), "metadata"),
|
|
54
|
+
help="Specify the metadata directory to pull XML files from")
|
|
55
|
+
parser.add_argument("-m", "--metadata", required = False, const = 'e', default = 'none',
|
|
56
|
+
nargs = '?', choices = ['none', 'n', 'exact', 'e', 'flat', 'f'], type = str.lower,
|
|
57
|
+
help="Set whether to include xml metadata fields in the generation of the Opex")
|
|
58
|
+
parser.add_argument("-ex", "--export", required = False, action = 'store_true', default = False,
|
|
59
|
+
help="Set whether to export the generated auto classification references to an AutoClass spreadsheet")
|
|
60
|
+
parser.add_argument("-i", "--input", required = False, nargs='?',
|
|
61
|
+
help="Set to utilise a CSV / XLSX spreadsheet to import data from")
|
|
62
|
+
parser.add_argument("-rm", "--remove", required = False, action = "store_true", default = False,
|
|
63
|
+
help="Set whether to enable removals of files and folders from a directory. ***Currently in testing")
|
|
64
|
+
parser.add_argument("-z", "--zip", required = False, action = 'store_true',
|
|
65
|
+
help="Set to zip files")
|
|
66
|
+
parser.add_argument("-fmt", "--output-format", required = False, default = "xlsx", choices = ['xlsx', 'csv'],
|
|
67
|
+
help="Set whether to output to an xlsx or csv format")
|
|
31
68
|
parser.add_argument("-v", "--version", action = 'version', version = '%(prog)s {version}'.format(version = importlib.metadata.version("opex_manifest_generator")))
|
|
32
|
-
parser.add_argument("--
|
|
33
|
-
|
|
69
|
+
parser.add_argument("--accession-mode", required=False, choices=["file",'directory','both'],
|
|
70
|
+
help="""Set the mode when utilising the Accession option in autoclass.
|
|
71
|
+
file - only adds on files, folder - only adds on folders, both - adds on files and folders""")
|
|
72
|
+
parser.add_argument("--hidden", required = False, action = 'store_true', default = False,
|
|
73
|
+
help="Set whether to include hidden files and folders")
|
|
74
|
+
parser.add_argument("--print-xmls", required = False, action = "store_true", default = False,
|
|
75
|
+
help="Prints the elements from your xmls to the consoles")
|
|
34
76
|
args = parser.parse_args()
|
|
35
77
|
return args
|
|
36
78
|
|
|
@@ -51,11 +93,6 @@ def run_cli():
|
|
|
51
93
|
time.sleep(5)
|
|
52
94
|
if args.print_xmls:
|
|
53
95
|
OpexManifestGenerator.print_descriptive_xmls()
|
|
54
|
-
if args.autoclass:
|
|
55
|
-
pass
|
|
56
|
-
# if not args.prefix:
|
|
57
|
-
# print('A prefix must be set when using Auto-Classification, stopping operation')
|
|
58
|
-
# time.sleep(3); raise SystemExit()
|
|
59
96
|
acc_prefix = None
|
|
60
97
|
if args.prefix:
|
|
61
98
|
if args.autoclass in {"both", "b", "both-generic", "bg"}:
|
|
@@ -113,9 +150,11 @@ def run_cli():
|
|
|
113
150
|
export_flag = args.export,
|
|
114
151
|
meta_dir_flag = args.disable_meta_dir,
|
|
115
152
|
metadata_flag = args.metadata,
|
|
153
|
+
metadata_dir = args.metadata_dir,
|
|
116
154
|
hidden_flag= args.hidden,
|
|
117
155
|
zip_flag = args.zip,
|
|
118
156
|
input = args.input,
|
|
119
157
|
output_format = args.output_format).main()
|
|
120
|
-
|
|
121
|
-
|
|
158
|
+
|
|
159
|
+
if __name__ == "__main__":
|
|
160
|
+
run_cli()
|
|
@@ -5,7 +5,9 @@ author: Christopher Prince
|
|
|
5
5
|
license: Apache License 2.0"
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
import zipfile, os, sys
|
|
8
|
+
import zipfile, os, sys, time, stat
|
|
9
|
+
import datetime
|
|
10
|
+
from lxml import etree
|
|
9
11
|
|
|
10
12
|
def zip_opex(file_path,opex_path):
|
|
11
13
|
zip_file = f"{file_path}.zip"
|
|
@@ -19,4 +21,38 @@ def win_256_check(path: str):
|
|
|
19
21
|
if len(path) > 255 and sys.platform == "win32":
|
|
20
22
|
if path.startswith(u'\\\\?\\'): path = path
|
|
21
23
|
else: path = u"\\\\?\\" + path
|
|
22
|
-
return path
|
|
24
|
+
return path
|
|
25
|
+
|
|
26
|
+
def filter_win_hidden(path: str):
|
|
27
|
+
if sys.platform =="win32":
|
|
28
|
+
if bool(os.stat(path).st_file_attribute & stat.FILE_ATTRIBUTE_HIDDEN) is True:
|
|
29
|
+
return True
|
|
30
|
+
else:
|
|
31
|
+
return False
|
|
32
|
+
else:
|
|
33
|
+
return False
|
|
34
|
+
|
|
35
|
+
def check_nan(value):
|
|
36
|
+
if str(value).lower() in {"nan","nat"}:
|
|
37
|
+
value = None
|
|
38
|
+
return value
|
|
39
|
+
|
|
40
|
+
def check_opex(opex_path:str):
|
|
41
|
+
opex_path = opex_path + ".opex"
|
|
42
|
+
if os.path.exists(win_256_check(opex_path)):
|
|
43
|
+
return False
|
|
44
|
+
else:
|
|
45
|
+
return True
|
|
46
|
+
|
|
47
|
+
def write_opex(path: str, opexxml: etree.Element):
|
|
48
|
+
opex_path = win_256_check(str(path) + ".opex")
|
|
49
|
+
opex = etree.indent(opexxml, " ")
|
|
50
|
+
opex = etree.tostring(opexxml, pretty_print=True, xml_declaration=True, encoding="UTF-8", standalone=True)
|
|
51
|
+
with open(f'{opex_path}', 'w', encoding="UTF-8") as writer:
|
|
52
|
+
writer.write(opex.decode('UTF-8'))
|
|
53
|
+
print('Saved Opex File to: ' + opex_path)
|
|
54
|
+
return opex_path
|
|
55
|
+
|
|
56
|
+
def print_running_time(start_time):
|
|
57
|
+
print(f'Running time: {datetime.datetime.now() - start_time}')
|
|
58
|
+
time.sleep(5)
|
|
@@ -8,24 +8,22 @@ license: Apache License 2.0"
|
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
10
|
import lxml.etree as ET
|
|
11
|
-
import os
|
|
11
|
+
import os, time, shutil
|
|
12
12
|
from auto_classification_generator import ClassificationGenerator
|
|
13
13
|
from auto_classification_generator.common import export_list_txt, export_xl, export_csv, define_output_file
|
|
14
|
-
|
|
15
|
-
import time
|
|
14
|
+
import datetime
|
|
16
15
|
import pandas as pd
|
|
17
16
|
from pandas.api.types import is_datetime64_any_dtype
|
|
18
17
|
from opex_manifest_generator.hash import HashGenerator
|
|
19
18
|
from opex_manifest_generator.common import *
|
|
20
|
-
import
|
|
21
|
-
import shutil
|
|
19
|
+
import configparser
|
|
22
20
|
|
|
23
21
|
class OpexManifestGenerator():
|
|
24
22
|
def __init__(self,
|
|
25
23
|
root: str,
|
|
26
24
|
output_path: os.path = os.getcwd(),
|
|
27
25
|
meta_dir_flag: bool = True,
|
|
28
|
-
metadata_dir:
|
|
26
|
+
metadata_dir: os = os.path.join(os.path.dirname(os.path.realpath(__file__)), "metadata"),
|
|
29
27
|
metadata_flag: str = 'none',
|
|
30
28
|
autoclass_flag: str = None,
|
|
31
29
|
prefix: str = None,
|
|
@@ -40,13 +38,14 @@ class OpexManifestGenerator():
|
|
|
40
38
|
zip_flag: bool = False,
|
|
41
39
|
hidden_flag: bool = False,
|
|
42
40
|
output_format: str = "xlsx",
|
|
43
|
-
print_xmls_flag: bool = False
|
|
41
|
+
print_xmls_flag: bool = False,
|
|
42
|
+
options_file: str = os.path.join(os.path.dirname(__file__),'options.properties')):
|
|
44
43
|
|
|
45
44
|
self.root = os.path.abspath(root)
|
|
46
45
|
self.opexns = "http://www.openpreservationexchange.org/opex/v1.2"
|
|
47
46
|
self.list_path = []
|
|
48
47
|
self.list_fixity = []
|
|
49
|
-
self.start_time = datetime.now()
|
|
48
|
+
self.start_time = datetime.datetime.now()
|
|
50
49
|
self.algorithm = algorithm
|
|
51
50
|
self.empty_flag = empty_flag
|
|
52
51
|
self.remove_flag = remove_flag
|
|
@@ -59,52 +58,131 @@ class OpexManifestGenerator():
|
|
|
59
58
|
self.prefix = prefix
|
|
60
59
|
self.acc_prefix = acc_prefix
|
|
61
60
|
self.input = input
|
|
62
|
-
self.title_flag = False
|
|
63
|
-
self.description_flag = False
|
|
64
|
-
self.security_flag = False
|
|
65
|
-
self.ignore_flag = False
|
|
66
|
-
self.sourceid_flag = False
|
|
67
|
-
self.hash_from_spread = False
|
|
68
61
|
self.hidden_flag = hidden_flag
|
|
69
62
|
self.zip_flag = zip_flag
|
|
70
63
|
self.output_format = output_format
|
|
71
64
|
self.metadata_flag = metadata_flag
|
|
72
65
|
self.metadata_dir = metadata_dir
|
|
73
66
|
self.print_xmls_flag = print_xmls_flag
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
67
|
+
self.parse_config(options_file=os.path.abspath(options_file))
|
|
68
|
+
|
|
69
|
+
self.title_flag = False
|
|
70
|
+
self.description_flag = False
|
|
71
|
+
self.security_flag = False
|
|
72
|
+
self.ignore_flag = False
|
|
73
|
+
self.sourceid_flag = False
|
|
74
|
+
self.hash_from_spread = False
|
|
75
|
+
|
|
76
|
+
def parse_config(self, options_file: str = 'options.properties'):
|
|
77
|
+
config = configparser.ConfigParser()
|
|
78
|
+
config.read(options_file, encoding='utf-8')
|
|
79
|
+
global INDEX_FIELD
|
|
80
|
+
INDEX_FIELD = config['options']['INDEX_FIELD']
|
|
81
|
+
global TITLE_FIELD
|
|
82
|
+
TITLE_FIELD = config['options']['TITLE_FIELD']
|
|
83
|
+
global DESCRIPTION_FIELD
|
|
84
|
+
DESCRIPTION_FIELD = config['options']['DESCRIPTION_FIELD']
|
|
85
|
+
global SECUIRTY_FIELD
|
|
86
|
+
SECUIRTY_FIELD = config['options']['SECUIRTY_FIELD']
|
|
87
|
+
global IDENTIFIER_FIELD
|
|
88
|
+
IDENTIFIER_FIELD = config['options']['IDENTIFIER_FIELD']
|
|
89
|
+
global IDENTIFIER_DEFAULT
|
|
90
|
+
IDENTIFIER_DEFAULT = config['options']['IDENTIFIER_DEFAULT']
|
|
91
|
+
global REMOVAL_FIELD
|
|
92
|
+
REMOVAL_FIELD = config['options']['REMOVAL_FIELD']
|
|
93
|
+
global IGNORE_FIELD
|
|
94
|
+
IGNORE_FIELD = config['options']['IGNORE_FIELD']
|
|
95
|
+
global SOURCEID_FIELD
|
|
96
|
+
SOURCEID_FIELD = config['options']['SOURCEID_FIELD']
|
|
97
|
+
global HASH_FIELD
|
|
98
|
+
HASH_FIELD = config['options']['HASH_FIELD']
|
|
99
|
+
global ALGORITHM_FIELD
|
|
100
|
+
ALGORITHM_FIELD = config['options']['ALGORITHM_FIELD']
|
|
101
|
+
|
|
102
|
+
def print_descriptive_xmls(self):
|
|
103
|
+
for file in os.scandir(self.metadata_dir):
|
|
104
|
+
path = os.path.join(self.metadata_dir, file.name)
|
|
105
|
+
print(path)
|
|
106
|
+
xml_file = ET.parse(path)
|
|
107
|
+
root_element = ET.QName(xml_file.find('.'))
|
|
108
|
+
root_element_ln = root_element.localname
|
|
109
|
+
for elem in xml_file.findall(".//"):
|
|
110
|
+
elem_path = xml_file.getelementpath(elem)
|
|
111
|
+
elem = ET.QName(elem)
|
|
112
|
+
elem_lnpath = elem_path.replace(f"{{{elem.namespace}}}", root_element_ln + ":")
|
|
113
|
+
print(elem_lnpath)
|
|
114
|
+
|
|
115
|
+
def set_input_flags(self):
|
|
116
|
+
if 'Title' in self.column_headers:
|
|
117
|
+
self.title_flag = True
|
|
118
|
+
if 'Description' in self.column_headers:
|
|
119
|
+
self.description_flag = True
|
|
120
|
+
if 'Security' in self.column_headers:
|
|
121
|
+
self.security_flag = True
|
|
122
|
+
if 'SourceID' in self.column_headers:
|
|
123
|
+
self.sourceid_flag = True
|
|
124
|
+
if 'Ignore' in self.column_headers:
|
|
125
|
+
self.ignore_flag = True
|
|
126
|
+
if 'Hash' in self.column_headers and 'Algorithm' in self.column_headers:
|
|
127
|
+
self.hash_from_spread = True
|
|
128
|
+
print("Hash detected in Spreadsheet; taking hashes from spreadsheet")
|
|
129
|
+
time.sleep(3)
|
|
130
|
+
|
|
131
|
+
def init_df(self):
|
|
132
|
+
if self.autoclass_flag:
|
|
133
|
+
if self.autoclass_flag in {"catalog", "c", "catalog-generic", "cg"}:
|
|
134
|
+
ac = ClassificationGenerator(self.root, output_path = self.output_path, prefix = self.prefix, start_ref = self.startref, empty_flag = self.empty_flag, accession_flag = False)
|
|
135
|
+
elif self.autoclass_flag in {"accession", "a", "accession-generic", "ag", "both", "b", "both-generic", "bg"}:
|
|
136
|
+
ac = ClassificationGenerator(self.root, output_path = self.output_path, prefix = self.prefix, accprefix = self.acc_prefix, start_ref = self.startref, empty_flag = self.empty_flag, accession_flag="File")
|
|
137
|
+
self.df = ac.init_dataframe()
|
|
138
|
+
if self.autoclass_flag in {"accession", "a", "accesion-generic", "ag"}:
|
|
139
|
+
self.df = self.df.drop('Archive_Reference', axis=1)
|
|
140
|
+
self.column_headers = self.df.columns.values.tolist()
|
|
141
|
+
self.set_input_flags()
|
|
142
|
+
if self.export_flag:
|
|
143
|
+
output_path = define_output_file(self.output_path, self.root, meta_dir_flag = self.meta_dir_flag, output_format = self.output_format)
|
|
144
|
+
if self.output_format == "xlsx":
|
|
145
|
+
export_xl(self.df, output_path)
|
|
146
|
+
elif self.output_format == "csv":
|
|
147
|
+
export_csv(self.df, output_path)
|
|
148
|
+
elif self.input:
|
|
149
|
+
if self.input.endswith('xlsx'):
|
|
150
|
+
self.df = pd.read_excel(self.input)
|
|
151
|
+
elif self.input.endswith('csv'):
|
|
152
|
+
self.df = pd.read_csv(self.input)
|
|
153
|
+
self.column_headers = self.df.columns.values.tolist()
|
|
154
|
+
self.set_input_flags()
|
|
155
|
+
else:
|
|
156
|
+
self.df = None
|
|
157
|
+
self.column_headers = None
|
|
158
|
+
|
|
159
|
+
def clear_opex(self):
|
|
160
|
+
walk = list(os.walk(self.root))
|
|
161
|
+
for dir, _, files in walk[::-1]:
|
|
162
|
+
for file in files:
|
|
163
|
+
file_path = win_256_check(os.path.join(dir, file))
|
|
164
|
+
if str(file_path).endswith('.opex'):
|
|
165
|
+
os.remove(file_path)
|
|
166
|
+
print(f'Cleared Opex: {file_path}')
|
|
78
167
|
|
|
79
168
|
def index_df_lookup(self, path: str):
|
|
80
|
-
idx = self.df[
|
|
169
|
+
idx = self.df[INDEX_FIELD].index[self.df[INDEX_FIELD] == path]
|
|
81
170
|
return idx
|
|
82
171
|
|
|
83
|
-
def
|
|
172
|
+
def xip_df_lookup(self, idx: pd.Index):
|
|
84
173
|
try:
|
|
174
|
+
title = None
|
|
175
|
+
description = None
|
|
176
|
+
security = None
|
|
85
177
|
if idx.empty:
|
|
86
|
-
|
|
87
|
-
description = None
|
|
88
|
-
security = None
|
|
178
|
+
pass
|
|
89
179
|
else:
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
if self.description_flag:
|
|
97
|
-
description = self.df['Description'].loc[idx].item()
|
|
98
|
-
if str(description).lower() in {"nan","nat"}:
|
|
99
|
-
description = None
|
|
100
|
-
else:
|
|
101
|
-
description = None
|
|
102
|
-
if self.security_flag:
|
|
103
|
-
security = self.df['Security'].loc[idx].item()
|
|
104
|
-
if str(security).lower() in {"nan","nat"}:
|
|
105
|
-
security = None
|
|
106
|
-
else:
|
|
107
|
-
security = None
|
|
180
|
+
if self.title_flag:
|
|
181
|
+
title = check_nan(self.df[TITLE_FIELD].loc[idx].item())
|
|
182
|
+
if self.description_flag:
|
|
183
|
+
description = check_nan(self.df[DESCRIPTION_FIELD].loc[idx].item())
|
|
184
|
+
if self.security_flag:
|
|
185
|
+
security = check_nan(self.df[SECUIRTY_FIELD].loc[idx].item())
|
|
108
186
|
return title,description,security
|
|
109
187
|
except Exception as e:
|
|
110
188
|
print('Error Looking up XIP Metadata')
|
|
@@ -115,10 +193,8 @@ class OpexManifestGenerator():
|
|
|
115
193
|
if idx.empty:
|
|
116
194
|
return False
|
|
117
195
|
else:
|
|
118
|
-
remove = self.df[
|
|
119
|
-
if
|
|
120
|
-
return False
|
|
121
|
-
elif bool(remove):
|
|
196
|
+
remove = check_nan(self.df[REMOVAL_FIELD].loc[idx].item())
|
|
197
|
+
if remove:
|
|
122
198
|
print(f"Removing: {path}")
|
|
123
199
|
# Not functioning correctly
|
|
124
200
|
if os.path.isdir(path):
|
|
@@ -137,15 +213,10 @@ class OpexManifestGenerator():
|
|
|
137
213
|
if idx.empty:
|
|
138
214
|
return False
|
|
139
215
|
else:
|
|
140
|
-
ignore = self.df[
|
|
141
|
-
|
|
142
|
-
return False
|
|
143
|
-
elif str(ignore).lower() in {"true", "1.0"}:
|
|
144
|
-
return True
|
|
145
|
-
elif str(ignore).lower() in {"false", "0.0"}:
|
|
146
|
-
return False
|
|
216
|
+
ignore = check_nan(self.df[IGNORE_FIELD].loc[idx].item())
|
|
217
|
+
return bool(ignore)
|
|
147
218
|
except Exception as e:
|
|
148
|
-
print('Error looking up
|
|
219
|
+
print('Error looking up Ignore')
|
|
149
220
|
print(e)
|
|
150
221
|
|
|
151
222
|
def sourceid_df_lookup(self, xml_element: ET.SubElement, idx: pd.Index):
|
|
@@ -153,149 +224,54 @@ class OpexManifestGenerator():
|
|
|
153
224
|
if idx.empty:
|
|
154
225
|
pass
|
|
155
226
|
else:
|
|
156
|
-
sourceid = self.df[
|
|
157
|
-
if
|
|
158
|
-
pass
|
|
159
|
-
else:
|
|
227
|
+
sourceid = check_nan(self.df[SOURCEID_FIELD].loc[idx].item())
|
|
228
|
+
if sourceid:
|
|
160
229
|
source_xml = ET.SubElement(xml_element,f"{{{self.opexns}}}SourceID")
|
|
161
230
|
source_xml.text = str(sourceid)
|
|
162
231
|
except Exception as e:
|
|
163
232
|
print('Error looking up SourceID')
|
|
164
233
|
print(e)
|
|
165
234
|
|
|
166
|
-
def hash_df_lookup(self,
|
|
235
|
+
def hash_df_lookup(self, xml_fixities: ET.SubElement, idx: pd.Index):
|
|
167
236
|
try:
|
|
168
237
|
if idx.empty:
|
|
169
238
|
pass
|
|
170
239
|
else:
|
|
171
240
|
self.fixity = ET.SubElement(xml_fixities,f"{{{self.opexns}}}Fixity")
|
|
172
|
-
self.hash = self.df[
|
|
173
|
-
self.algorithm = self.df[
|
|
174
|
-
|
|
175
|
-
self.fixity.set(
|
|
176
|
-
self.fixity.set("value",self.hash)
|
|
241
|
+
self.hash = self.df[HASH_FIELD].loc[idx].item()
|
|
242
|
+
self.algorithm = self.df[ALGORITHM_FIELD].loc[idx].item()
|
|
243
|
+
self.fixity.set('type', self.algorithm)
|
|
244
|
+
self.fixity.set('value',self.hash)
|
|
177
245
|
except Exception as e:
|
|
178
|
-
print('Error looking up
|
|
246
|
+
print('Error looking up Hash')
|
|
179
247
|
print(e)
|
|
180
248
|
|
|
181
|
-
def ident_df_lookup(self, idx: pd.Index,
|
|
249
|
+
def ident_df_lookup(self, idx: pd.Index, default_key: str = None):
|
|
182
250
|
try:
|
|
183
251
|
if idx.empty:
|
|
184
|
-
|
|
185
|
-
self.identifier = ET.SubElement(self.identifiers,f"{{{self.opexns}}}Identifier")
|
|
186
|
-
if key_override is None:
|
|
187
|
-
key_name = "code"
|
|
188
|
-
else:
|
|
189
|
-
key_name = key_override
|
|
190
|
-
self.identifier.set("type",key_name)
|
|
191
|
-
self.identifier.text = ident
|
|
252
|
+
pass
|
|
192
253
|
else:
|
|
193
254
|
for header in self.column_headers:
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
if '
|
|
197
|
-
key_name = str(header).
|
|
198
|
-
elif
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
key_name = "code"
|
|
255
|
+
ident = None
|
|
256
|
+
if any(s in header for s in {IDENTIFIER_FIELD,'Archive_Reference','Accession_Reference'}):
|
|
257
|
+
if f'{IDENTIFIER_FIELD}:' in header:
|
|
258
|
+
key_name = str(header).split(':',1)[-1]
|
|
259
|
+
elif IDENTIFIER_FIELD in header:
|
|
260
|
+
key_name = IDENTIFIER_DEFAULT
|
|
261
|
+
elif 'Archive_Reference' in header:
|
|
262
|
+
key_name = IDENTIFIER_DEFAULT
|
|
263
|
+
elif 'Accession_Reference' in header:
|
|
264
|
+
key_name = "accref"
|
|
205
265
|
else:
|
|
206
|
-
key_name =
|
|
207
|
-
|
|
208
|
-
ident
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
self.identifier = ET.SubElement(self.identifiers, f"{{{self.opexns}}}Identifier")
|
|
213
|
-
self.identifier.set("type", key_name)
|
|
214
|
-
self.identifier.text = str(ident)
|
|
266
|
+
key_name = IDENTIFIER_DEFAULT
|
|
267
|
+
ident = check_nan(self.df[header].loc[idx].item())
|
|
268
|
+
if ident:
|
|
269
|
+
self.identifier = ET.SubElement(self.identifiers, f"{{{self.opexns}}}Identifier")
|
|
270
|
+
self.identifier.set("type", key_name)
|
|
271
|
+
self.identifier.text = str(ident)
|
|
215
272
|
except Exception as e:
|
|
216
273
|
print('Error looking up Identifiers')
|
|
217
274
|
print(e)
|
|
218
|
-
|
|
219
|
-
def check_opex(self, opex_path: str):
|
|
220
|
-
opex_path = opex_path + ".opex"
|
|
221
|
-
if os.path.exists(win_256_check(opex_path)):
|
|
222
|
-
return False
|
|
223
|
-
else:
|
|
224
|
-
return True
|
|
225
|
-
|
|
226
|
-
def write_opex(self, path: str, opexxml: ET.Element):
|
|
227
|
-
opex_path = win_256_check(str(path) + ".opex")
|
|
228
|
-
opex = ET.indent(opexxml, " ")
|
|
229
|
-
opex = ET.tostring(opexxml, pretty_print=True, xml_declaration=True, encoding="UTF-8", standalone=True)
|
|
230
|
-
with open(f'{opex_path}', 'w', encoding="UTF-8") as writer:
|
|
231
|
-
writer.write(opex.decode('UTF-8'))
|
|
232
|
-
print('Saved Opex File to: ' + opex_path)
|
|
233
|
-
return opex_path
|
|
234
|
-
|
|
235
|
-
def init_df(self):
|
|
236
|
-
if self.autoclass_flag:
|
|
237
|
-
if self.autoclass_flag in {"catalog", "c", "catalog-generic", "cg"}:
|
|
238
|
-
ac = ClassificationGenerator(self.root, output_path = self.output_path, prefix = self.prefix, start_ref = self.startref, empty_flag = self.empty_flag, accession_flag = False)
|
|
239
|
-
elif self.autoclass_flag in {"accession", "a", "accession-generic", "ag", "both", "b", "both-generic", "bg"}:
|
|
240
|
-
ac = ClassificationGenerator(self.root, output_path = self.output_path, prefix = self.prefix, accprefix = self.acc_prefix, start_ref = self.startref, empty_flag = self.empty_flag, accession_flag="File")
|
|
241
|
-
self.df = ac.init_dataframe()
|
|
242
|
-
if self.autoclass_flag in {"accession", "a", "accesion-generic", "ag"}:
|
|
243
|
-
self.df = self.df.drop('Archive_Reference', axis=1)
|
|
244
|
-
self.column_headers = self.df.columns.values.tolist()
|
|
245
|
-
if self.export_flag:
|
|
246
|
-
output_path = define_output_file(self.output_path, self.root, meta_dir_flag = self.meta_dir_flag, output_format = self.output_format)
|
|
247
|
-
if self.output_format == "xlsx":
|
|
248
|
-
export_xl(self.df, output_path)
|
|
249
|
-
elif self.output_format == "csv":
|
|
250
|
-
export_csv(self.df, output_path)
|
|
251
|
-
elif self.input:
|
|
252
|
-
if self.input.endswith('xlsx'):
|
|
253
|
-
self.df = pd.read_excel(self.input)
|
|
254
|
-
elif self.input.endswith('csv'):
|
|
255
|
-
self.df = pd.read_csv(self.input)
|
|
256
|
-
self.column_headers = self.df.columns.values.tolist()
|
|
257
|
-
self.set_input_flags()
|
|
258
|
-
else:
|
|
259
|
-
self.df = None
|
|
260
|
-
self.column_headers = None
|
|
261
|
-
|
|
262
|
-
def clear_opex(self):
|
|
263
|
-
walk = list(os.walk(self.root))
|
|
264
|
-
for dir, _, files in walk[::-1]:
|
|
265
|
-
for file in files:
|
|
266
|
-
file_path = win_256_check(os.path.join(dir, file))
|
|
267
|
-
if str(file_path).endswith('.opex'):
|
|
268
|
-
os.remove(file_path)
|
|
269
|
-
print(f'Cleared Opex: {file_path}')
|
|
270
|
-
|
|
271
|
-
def set_input_flags(self):
|
|
272
|
-
if 'Title' in self.column_headers:
|
|
273
|
-
self.title_flag = True
|
|
274
|
-
if 'Description' in self.column_headers:
|
|
275
|
-
self.description_flag = True
|
|
276
|
-
if 'Security' in self.column_headers:
|
|
277
|
-
self.security_flag = True
|
|
278
|
-
if 'SourceID' in self.column_headers:
|
|
279
|
-
self.sourceid_flag = True
|
|
280
|
-
if 'Ignore' in self.column_headers:
|
|
281
|
-
self.ignore_flag = True
|
|
282
|
-
if 'Hash' in self.column_headers and 'Algorithm' in self.column_headers:
|
|
283
|
-
self.hash_from_spread = True
|
|
284
|
-
print("Hash detected in Spreadsheet; taking hashes from spreadsheet")
|
|
285
|
-
time.sleep(3)
|
|
286
|
-
|
|
287
|
-
def print_descriptive_xmls(self):
|
|
288
|
-
for file in os.scandir(self.metadata_dir):
|
|
289
|
-
path = os.path.join(self.metadata_dir, file.name)
|
|
290
|
-
print(path)
|
|
291
|
-
xml_file = ET.parse(path)
|
|
292
|
-
root_element = ET.QName(xml_file.find('.'))
|
|
293
|
-
root_element_ln = root_element.localname
|
|
294
|
-
for elem in xml_file.findall(".//"):
|
|
295
|
-
elem_path = xml_file.getelementpath(elem)
|
|
296
|
-
elem = ET.QName(elem)
|
|
297
|
-
elem_lnpath = elem_path.replace(f"{{{elem.namespace}}}", root_element_ln + ":")
|
|
298
|
-
print(elem_lnpath)
|
|
299
275
|
|
|
300
276
|
def init_generate_descriptive_metadata(self):
|
|
301
277
|
self.xml_files = []
|
|
@@ -329,15 +305,19 @@ class OpexManifestGenerator():
|
|
|
329
305
|
if len(list_xml) > 0:
|
|
330
306
|
self.xml_files.append({'data': list_xml, 'localname': root_element_ln, 'xmlfile': path})
|
|
331
307
|
|
|
332
|
-
def generate_descriptive_metadata(self,
|
|
308
|
+
def generate_descriptive_metadata(self, xml_desc_elem: ET.Element, idx: pd.Index):
|
|
309
|
+
"""
|
|
310
|
+
Composes the data into an xml file.
|
|
311
|
+
"""
|
|
333
312
|
for xml_file in self.xml_files:
|
|
334
313
|
list_xml = xml_file.get('data')
|
|
335
314
|
localname = xml_file.get('localname')
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
315
|
+
if len(list_xml) == 0:
|
|
316
|
+
pass
|
|
317
|
+
else:
|
|
318
|
+
if idx.empty:
|
|
319
|
+
pass
|
|
320
|
+
else:
|
|
341
321
|
xml_new = ET.parse(xml_file.get('xmlfile'))
|
|
342
322
|
for elem_dict in list_xml:
|
|
343
323
|
name = elem_dict.get('Name')
|
|
@@ -345,21 +325,28 @@ class OpexManifestGenerator():
|
|
|
345
325
|
ns = elem_dict.get('Namespace')
|
|
346
326
|
try:
|
|
347
327
|
if self.metadata_flag in {'e', 'exact'}:
|
|
348
|
-
val = self.df.loc[idx
|
|
328
|
+
val = check_nan(self.df[path].loc[idx].item())
|
|
349
329
|
elif self.metadata_flag in {'f', 'flat'}:
|
|
350
|
-
val = self.df.loc[idx
|
|
351
|
-
if
|
|
330
|
+
val = check_nan(self.df[name].loc[idx].item())
|
|
331
|
+
if val is None:
|
|
352
332
|
continue
|
|
353
333
|
else:
|
|
354
334
|
if is_datetime64_any_dtype(str(val)):
|
|
355
335
|
val = pd.to_datetime(val)
|
|
356
|
-
val = datetime.strftime(val, "%Y-%m-%dT%H
|
|
336
|
+
val = datetime.datetime.strftime(val, "%Y-%m-%dT%H:%M:%S.000Z")
|
|
337
|
+
if self.metadata_flag in {'e','exact'}:
|
|
338
|
+
n = path.replace(localname + ":", f"{{{ns}}}")
|
|
339
|
+
elem = xml_new.find(f'/{n}')
|
|
340
|
+
elif self.metadata_flag in {'f', 'flat'}:
|
|
341
|
+
n = name.split(':')[-1]
|
|
342
|
+
elem = xml_new.find(f'//{{{ns}}}{n}')
|
|
343
|
+
elem.text = str(val)
|
|
357
344
|
except KeyError as e:
|
|
358
345
|
print('Key Error: please ensure column header\'s are an exact match...')
|
|
359
346
|
print(f'Missing Column: {e}')
|
|
360
347
|
print('Alternatively use flat mode...')
|
|
361
348
|
time.sleep(3)
|
|
362
|
-
raise
|
|
349
|
+
raise SystemExit()
|
|
363
350
|
except IndexError as e:
|
|
364
351
|
print("""Index Error; it is likely you have removed or added a file/folder to the directory \
|
|
365
352
|
after generating the spreadsheet. An opex will still be generated but with no xml metadata. \
|
|
@@ -367,25 +354,11 @@ class OpexManifestGenerator():
|
|
|
367
354
|
print(f'Error: {e}')
|
|
368
355
|
time.sleep(3)
|
|
369
356
|
break
|
|
370
|
-
|
|
371
|
-
continue
|
|
372
|
-
if self.metadata_flag in {'e','exact'}:
|
|
373
|
-
n = path.replace(localname + ":", f"{{{ns}}}")
|
|
374
|
-
elem = xml_new.find(f'/{n}')
|
|
375
|
-
elif self.metadata_flag in {'f', 'flat'}:
|
|
376
|
-
n = name.split(':')[-1]
|
|
377
|
-
elem = xml_new.find(f'//{{{ns}}}{n}')
|
|
378
|
-
elem.text = str(val)
|
|
379
|
-
xml_desc.append(xml_new.find('.'))
|
|
380
|
-
else:
|
|
381
|
-
pass
|
|
382
|
-
else:
|
|
383
|
-
pass
|
|
357
|
+
xml_desc_elem.append(xml_new.find('.'))
|
|
384
358
|
|
|
385
|
-
def generate_opex_properties(self, xmlroot: ET.Element, idx: int,
|
|
386
|
-
|
|
359
|
+
def generate_opex_properties(self, xmlroot: ET.Element, idx: int, title: str = None,
|
|
360
|
+
description: str = None, security: str = None):
|
|
387
361
|
self.properties = ET.SubElement(xmlroot, f"{{{self.opexns}}}Properties")
|
|
388
|
-
self.identifiers = ET.SubElement(self.properties, f"{{{self.opexns}}}Identifiers")
|
|
389
362
|
if title:
|
|
390
363
|
self.titlexml = ET.SubElement(self.properties, f"{{{self.opexns}}}Title")
|
|
391
364
|
self.titlexml.text = str(title)
|
|
@@ -395,12 +368,9 @@ class OpexManifestGenerator():
|
|
|
395
368
|
if security:
|
|
396
369
|
self.securityxml = ET.SubElement(self.properties, f"{{{self.opexns}}}SecurityDescriptor")
|
|
397
370
|
self.securityxml.text = str(security)
|
|
398
|
-
if self.autoclass_flag in {"generic", "g"}:
|
|
399
|
-
self.
|
|
400
|
-
elif self.autoclass_flag not in {"generic", "g"} or self.input:
|
|
371
|
+
if self.autoclass_flag not in {"generic", "g"} or self.input:
|
|
372
|
+
self.identifiers = ET.SubElement(self.properties, f"{{{self.opexns}}}Identifiers")
|
|
401
373
|
self.ident_df_lookup(idx)
|
|
402
|
-
if self.identifiers is None:
|
|
403
|
-
self.properties.remove(self.identifiers)
|
|
404
374
|
if self.properties is None:
|
|
405
375
|
xmlroot.remove(self.properties)
|
|
406
376
|
|
|
@@ -423,7 +393,7 @@ class OpexManifestGenerator():
|
|
|
423
393
|
if self.autoclass_flag or self.algorithm or self.input:
|
|
424
394
|
pass
|
|
425
395
|
else:
|
|
426
|
-
self.
|
|
396
|
+
print_running_time(self.start_time)
|
|
427
397
|
print('Cleared OPEXES. No additional arguments passed, so ending program.'); time.sleep(3)
|
|
428
398
|
raise SystemExit()
|
|
429
399
|
if self.empty_flag:
|
|
@@ -437,7 +407,7 @@ class OpexManifestGenerator():
|
|
|
437
407
|
if self.algorithm:
|
|
438
408
|
output_path = define_output_file(self.output_path, self.root, self.meta_dir_flag, output_suffix = "_Fixities", output_format = "txt")
|
|
439
409
|
export_list_txt(self.list_fixity, output_path)
|
|
440
|
-
self.
|
|
410
|
+
print_running_time(self.start_time)
|
|
441
411
|
|
|
442
412
|
class OpexDir(OpexManifestGenerator):
|
|
443
413
|
def __init__(self, OMG: OpexManifestGenerator, folder_path: str, title: str = None, description: str = None, security: str = None):
|
|
@@ -448,19 +418,17 @@ class OpexDir(OpexManifestGenerator):
|
|
|
448
418
|
self.folder_path = folder_path.replace(u'\\\\?\\', "")
|
|
449
419
|
else:
|
|
450
420
|
self.folder_path = folder_path
|
|
451
|
-
if self.OMG.
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
elif self.OMG.autoclass_flag is None or self.OMG.autoclass_flag in {"g", "generic"}:
|
|
456
|
-
self.index = None
|
|
421
|
+
if self.OMG.autoclass_flag not in {None, "g","generic"}:
|
|
422
|
+
index = self.OMG.index_df_lookup(self.folder_path)
|
|
423
|
+
else:
|
|
424
|
+
index = None
|
|
457
425
|
if self.OMG.ignore_flag or self.OMG.remove_flag:
|
|
458
426
|
if self.OMG.ignore_flag:
|
|
459
|
-
self.ignore = self.OMG.ignore_df_lookup(
|
|
427
|
+
self.ignore = self.OMG.ignore_df_lookup(index)
|
|
460
428
|
if self.ignore:
|
|
461
429
|
return
|
|
462
430
|
if self.OMG.remove_flag:
|
|
463
|
-
self.removal = self.OMG.remove_df_lookup(self.folder_path,
|
|
431
|
+
self.removal = self.OMG.remove_df_lookup(self.folder_path, index)
|
|
464
432
|
if self.removal:
|
|
465
433
|
return
|
|
466
434
|
else:
|
|
@@ -474,7 +442,7 @@ class OpexDir(OpexManifestGenerator):
|
|
|
474
442
|
self.files = ET.SubElement(self.manifest, f"{{{self.opexns}}}Files")
|
|
475
443
|
|
|
476
444
|
if self.OMG.title_flag or self.OMG.description_flag or self.OMG.security_flag:
|
|
477
|
-
self.title, self.description, self.security = self.OMG.
|
|
445
|
+
self.title, self.description, self.security = self.OMG.xip_df_lookup(index)
|
|
478
446
|
elif self.OMG.autoclass_flag in {"generic", "g", "catalog-generic", "cg", "accession-generic", "ag", "both-generic", "bg"}:
|
|
479
447
|
self.title = os.path.basename(self.folder_path)
|
|
480
448
|
self.description = os.path.basename(self.folder_path)
|
|
@@ -484,40 +452,36 @@ class OpexDir(OpexManifestGenerator):
|
|
|
484
452
|
self.description = description
|
|
485
453
|
self.security = security
|
|
486
454
|
if self.OMG.sourceid_flag:
|
|
487
|
-
self.OMG.sourceid_df_lookup(self.transfer, self.folder_path,
|
|
455
|
+
self.OMG.sourceid_df_lookup(self.transfer, self.folder_path, index)
|
|
488
456
|
if self.OMG.autoclass_flag or self.OMG.input:
|
|
489
|
-
self.OMG.generate_opex_properties(self.xmlroot,
|
|
457
|
+
self.OMG.generate_opex_properties(self.xmlroot, index,
|
|
490
458
|
title = self.title,
|
|
491
459
|
description = self.description,
|
|
492
460
|
security = self.security)
|
|
493
461
|
if not self.OMG.metadata_flag in {'none', 'n'}:
|
|
494
462
|
self.xml_descmeta = ET.SubElement(self.xmlroot,f"{{{self.opexns}}}DescriptiveMetadata")
|
|
495
|
-
self.OMG.generate_descriptive_metadata(self.xmlroot, idx =
|
|
463
|
+
self.OMG.generate_descriptive_metadata(self.xmlroot, idx = index)
|
|
496
464
|
|
|
497
|
-
def
|
|
465
|
+
def filter_directories(self, directory: str, sort_key: str = str.casefold):
|
|
498
466
|
try:
|
|
499
|
-
if
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
list_directories = sorted([os.path.join(directory, f.name) for f in os.scandir(directory) \
|
|
518
|
-
if f.name != 'meta' \
|
|
519
|
-
and f.name != os.path.basename(__file__)], key=str.casefold)
|
|
520
|
-
return list_directories
|
|
467
|
+
if self.OMG.hidden_flag is False:
|
|
468
|
+
list_directories = sorted([win_256_check(os.path.join(directory, f.name)) for f in os.scandir(directory)
|
|
469
|
+
if not f.name.startswith('.')
|
|
470
|
+
and filter_win_hidden(win_256_check(os.path.join(directory, f.name))) is False
|
|
471
|
+
and f.name != 'meta'
|
|
472
|
+
and f.name != os.path.basename(__file__)],
|
|
473
|
+
key=sort_key)
|
|
474
|
+
elif self.OMG.hidden_flag is True:
|
|
475
|
+
list_directories = sorted([os.path.join(directory, f.name) for f in os.scandir(directory) \
|
|
476
|
+
if f.name != 'meta'
|
|
477
|
+
and f.name != os.path.basename(__file__)],
|
|
478
|
+
key=sort_key)
|
|
479
|
+
return list_directories
|
|
480
|
+
except Exception as e:
|
|
481
|
+
print('Failed to Filter')
|
|
482
|
+
print(e)
|
|
483
|
+
raise SystemError()
|
|
484
|
+
|
|
521
485
|
|
|
522
486
|
def generate_opex_dirs(self, path: str):
|
|
523
487
|
self = OpexDir(self.OMG, path)
|
|
@@ -532,7 +496,7 @@ class OpexDir(OpexManifestGenerator):
|
|
|
532
496
|
self.generate_opex_dirs(f_path)
|
|
533
497
|
else:
|
|
534
498
|
OpexFile(self.OMG, f_path, self.OMG.algorithm)
|
|
535
|
-
if
|
|
499
|
+
if check_opex(opex_path):
|
|
536
500
|
if not self.ignore:
|
|
537
501
|
for f_path in self.filter_directories(path):
|
|
538
502
|
if os.path.isfile(f_path):
|
|
@@ -543,7 +507,7 @@ class OpexDir(OpexManifestGenerator):
|
|
|
543
507
|
file.set("type", "content")
|
|
544
508
|
file.set("size", str(os.path.getsize(f_path)))
|
|
545
509
|
file.text = str(os.path.basename(f_path))
|
|
546
|
-
|
|
510
|
+
write_opex(opex_path, self.xmlroot)
|
|
547
511
|
else:
|
|
548
512
|
print(f"Avoiding override, Opex exists at: {opex_path}")
|
|
549
513
|
|
|
@@ -555,26 +519,32 @@ class OpexFile(OpexManifestGenerator):
|
|
|
555
519
|
self.file_path = file_path.replace(u'\\\\?\\', "")
|
|
556
520
|
else:
|
|
557
521
|
self.file_path = file_path
|
|
558
|
-
if
|
|
559
|
-
if self.OMG.input
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
self.
|
|
522
|
+
if check_opex(self.file_path):
|
|
523
|
+
if any([self.OMG.input,
|
|
524
|
+
self.OMG.autoclass_flag in {"c","catalog","a","accession","b","both","cg","catalog-generic","ag","accession-generic","bg","both-generic"},
|
|
525
|
+
self.OMG.ignore_flag,
|
|
526
|
+
self.OMG.remove_flag,
|
|
527
|
+
self.OMG.sourceid_flag,
|
|
528
|
+
self.OMG.title_flag,
|
|
529
|
+
self.OMG.description_flag,
|
|
530
|
+
self.OMG.security_flag]):
|
|
531
|
+
index = self.OMG.index_df_lookup(self.file_path)
|
|
563
532
|
elif self.OMG.autoclass_flag is None or self.OMG.autoclass_flag in {"g","generic"}:
|
|
564
|
-
|
|
533
|
+
index = None
|
|
565
534
|
if self.OMG.ignore_flag:
|
|
566
|
-
self.ignore = self.OMG.ignore_df_lookup(
|
|
535
|
+
self.ignore = self.OMG.ignore_df_lookup(index)
|
|
567
536
|
if self.ignore:
|
|
537
|
+
#WTF is this?
|
|
568
538
|
return
|
|
569
539
|
if self.OMG.remove_flag:
|
|
570
|
-
removal = self.OMG.remove_df_lookup(self.file_path,
|
|
540
|
+
removal = self.OMG.remove_df_lookup(self.file_path, index)
|
|
571
541
|
if removal:
|
|
572
542
|
return
|
|
573
543
|
else:
|
|
574
544
|
self.ignore = False
|
|
575
545
|
self.algorithm = algorithm
|
|
576
546
|
if self.OMG.title_flag or self.OMG.description_flag or self.OMG.security_flag:
|
|
577
|
-
self.title, self.description, self.security = self.OMG.
|
|
547
|
+
self.title, self.description, self.security = self.OMG.xip_df_lookup(index)
|
|
578
548
|
elif self.OMG.autoclass_flag in {"generic", "g", "catalog-generic", "cg", "accession-generic", "ag", "both-generic", "bg"}:
|
|
579
549
|
self.title = os.path.splitext(os.path.basename(self.file_path))[0]
|
|
580
550
|
self.description = os.path.splitext(os.path.basename(self.file_path))[0]
|
|
@@ -591,20 +561,20 @@ class OpexFile(OpexManifestGenerator):
|
|
|
591
561
|
if self.OMG.algorithm:
|
|
592
562
|
self.fixities = ET.SubElement(self.transfer, f"{{{self.opexns}}}Fixities")
|
|
593
563
|
if self.OMG.hash_from_spread:
|
|
594
|
-
self.OMG.hash_df_lookup(self.
|
|
564
|
+
self.OMG.hash_df_lookup(self.fixities, index)
|
|
595
565
|
else:
|
|
596
566
|
self.genererate_opex_fixity(self.file_path)
|
|
597
567
|
if self.transfer is None:
|
|
598
568
|
self.xmlroot.remove(self.transfer)
|
|
599
569
|
if self.OMG.autoclass_flag or self.OMG.input:
|
|
600
|
-
self.OMG.generate_opex_properties(self.xmlroot,
|
|
570
|
+
self.OMG.generate_opex_properties(self.xmlroot, index,
|
|
601
571
|
title = self.title,
|
|
602
572
|
description = self.description,
|
|
603
573
|
security = self.security)
|
|
604
574
|
if not self.OMG.metadata_flag in {'none','n'}:
|
|
605
575
|
self.xml_descmeta = ET.SubElement(self.xmlroot, f"{{{self.opexns}}}DescriptiveMetadata")
|
|
606
|
-
self.OMG.generate_descriptive_metadata(self.xml_descmeta,
|
|
607
|
-
opex_path =
|
|
576
|
+
self.OMG.generate_descriptive_metadata(self.xml_descmeta, index)
|
|
577
|
+
opex_path = write_opex(self.file_path, self.xmlroot)
|
|
608
578
|
if self.OMG.zip_flag:
|
|
609
579
|
zip_opex(self.file_path, opex_path)
|
|
610
580
|
else:
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
[options]
|
|
2
|
+
|
|
3
|
+
INDEX_FIELD = "FullName"
|
|
4
|
+
TITLE_FIELD = "Title"
|
|
5
|
+
DESCRIPTION_FIELD = "Description"
|
|
6
|
+
SECUIRTY_FIELD = "Security"
|
|
7
|
+
IDENTIFIER_FIELD = "Identifier"
|
|
8
|
+
IDENTIFIER_DEFAULT = "code"
|
|
9
|
+
REMOVAL_FIELD = "Removals"
|
|
10
|
+
IGNORE_FIELD = "Ignore"
|
|
11
|
+
SOURCEID_FIELD = "SourceID"
|
|
12
|
+
HASH_FIELD = "Hash"
|
|
13
|
+
ALGORITHM_FIELD = "Algorithm"
|
{opex_manifest_generator-1.1.10.dist-info → opex_manifest_generator-1.1.11.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: opex_manifest_generator
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.11
|
|
4
4
|
Summary: Opex Manifest Generator Tool for use with Opex / Preservica
|
|
5
5
|
Author-email: Christopher Prince <c.pj.prince@gmail.com>
|
|
6
6
|
Project-URL: Homepage, https://github.com/CPJPRINCE/opex_manifest_generator
|
{opex_manifest_generator-1.1.10.dist-info → opex_manifest_generator-1.1.11.dist-info}/RECORD
RENAMED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
opex_manifest_generator/__init__.py,sha256=fsN-dLjNMn-AfjeG2jkryzIa69bY09B5xLTqWedOv_w,461
|
|
2
|
-
opex_manifest_generator/cli.py,sha256=
|
|
3
|
-
opex_manifest_generator/common.py,sha256=
|
|
2
|
+
opex_manifest_generator/cli.py,sha256=6-ZbFZQT82aUYA5aNYLV4xteEw-pMgXsQr_yOCDklYM,10121
|
|
3
|
+
opex_manifest_generator/common.py,sha256=dJGRQicA7B9n5WXpG5pJlV0fzO0itF6GyDXWTJ-5mxs,1782
|
|
4
4
|
opex_manifest_generator/hash.py,sha256=mpLP-BqqDC7BuQQ5TEc4jaIBGabW-qVr128JaZq54mY,992
|
|
5
|
-
opex_manifest_generator/opex_manifest.py,sha256=
|
|
5
|
+
opex_manifest_generator/opex_manifest.py,sha256=NkvgVR2C5QIJPq3kIcjjAURh3uaO4P2CGpEiIP76N6Q,29209
|
|
6
|
+
opex_manifest_generator/options.properties,sha256=wLa_PjlOjzH1t88edz18wgTFC6F0it8ImPK9xUrQo9g,308
|
|
6
7
|
opex_manifest_generator/metadata/DublinCore Template.xml,sha256=csNGXzSH27Whs4BQNuwMZl8nLSdDq7Y_OblTfzeBqWQ,775
|
|
7
8
|
opex_manifest_generator/metadata/EAD Template.xml,sha256=OsWjUfKiLAsc1zqm56auyFSDYMuZpu6a49AFgqZNzlU,2167
|
|
8
9
|
opex_manifest_generator/metadata/GDPR Template.xml,sha256=r6WTATRVt1sr7VCdaXOwz4vLgU3IgFFx7oRmPnORBWc,475
|
|
@@ -13,9 +14,9 @@ opex_manifest_generator/samples/spreads/dctemplate.xlsx,sha256=R33CNcBK3s-KFYEhm
|
|
|
13
14
|
opex_manifest_generator/samples/spreads/eadtemplate.xlsx,sha256=4lrP0LLZXwv73fl3fvqr7yqpdDJuRj2D4ZADQ4OS6Ps,19299
|
|
14
15
|
opex_manifest_generator/samples/spreads/gdprtemplate.xlsx,sha256=3k6FpN6n83yF5wYd64Yy8Rxv2b1Z497icIB8UBGDW-M,18662
|
|
15
16
|
opex_manifest_generator/samples/spreads/modstemplate.xlsx,sha256=5kNpp4Cju_POvnTrgFk34OJVe5yc6o3R4ZNX2TT8zAc,19509
|
|
16
|
-
opex_manifest_generator-1.1.
|
|
17
|
-
opex_manifest_generator-1.1.
|
|
18
|
-
opex_manifest_generator-1.1.
|
|
19
|
-
opex_manifest_generator-1.1.
|
|
20
|
-
opex_manifest_generator-1.1.
|
|
21
|
-
opex_manifest_generator-1.1.
|
|
17
|
+
opex_manifest_generator-1.1.11.dist-info/LICENSE.md,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
|
|
18
|
+
opex_manifest_generator-1.1.11.dist-info/METADATA,sha256=qYpxKL4BOjXcYip-W6Z_jDA-FXs22O65G9IYTcU6J9I,687
|
|
19
|
+
opex_manifest_generator-1.1.11.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
|
|
20
|
+
opex_manifest_generator-1.1.11.dist-info/entry_points.txt,sha256=WGMc3hWlqOsQ8DcTuy8-DyBbJKkWNImT4J1FasVDHts,70
|
|
21
|
+
opex_manifest_generator-1.1.11.dist-info/top_level.txt,sha256=K48eGnaDLVO6YDJdAZLqbeoZvJHBGX25cvYT-i8gWt0,24
|
|
22
|
+
opex_manifest_generator-1.1.11.dist-info/RECORD,,
|
{opex_manifest_generator-1.1.10.dist-info → opex_manifest_generator-1.1.11.dist-info}/LICENSE.md
RENAMED
|
File without changes
|
|
File without changes
|
{opex_manifest_generator-1.1.10.dist-info → opex_manifest_generator-1.1.11.dist-info}/top_level.txt
RENAMED
|
File without changes
|