opex-manifest-generator 1.3.3__py3-none-any.whl → 1.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opex_manifest_generator/cli.py +134 -43
- opex_manifest_generator/common.py +31 -20
- opex_manifest_generator/hash.py +30 -14
- opex_manifest_generator/opex_manifest.py +545 -323
- {opex_manifest_generator-1.3.3.dist-info → opex_manifest_generator-1.3.4.dist-info}/METADATA +3 -1
- opex_manifest_generator-1.3.4.dist-info/RECORD +16 -0
- {opex_manifest_generator-1.3.3.dist-info → opex_manifest_generator-1.3.4.dist-info}/WHEEL +1 -1
- opex_manifest_generator-1.3.3.dist-info/RECORD +0 -16
- {opex_manifest_generator-1.3.3.dist-info → opex_manifest_generator-1.3.4.dist-info}/entry_points.txt +0 -0
- {opex_manifest_generator-1.3.3.dist-info → opex_manifest_generator-1.3.4.dist-info}/licenses/LICENSE.md +0 -0
- {opex_manifest_generator-1.3.3.dist-info → opex_manifest_generator-1.3.4.dist-info}/top_level.txt +0 -0
|
@@ -7,15 +7,30 @@ author: Christopher Prince
|
|
|
7
7
|
license: Apache License 2.0"
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
|
-
import
|
|
10
|
+
from lxml import etree as ET
|
|
11
11
|
import pandas as pd
|
|
12
|
-
import os,
|
|
12
|
+
import os, configparser, logging, zipfile
|
|
13
|
+
from typing import Optional
|
|
13
14
|
from auto_reference_generator import ReferenceGenerator
|
|
14
|
-
from auto_reference_generator.common import export_list_txt,
|
|
15
|
+
from auto_reference_generator.common import export_list_txt, \
|
|
16
|
+
export_xl, \
|
|
17
|
+
export_csv, \
|
|
18
|
+
export_json, \
|
|
19
|
+
export_ods, \
|
|
20
|
+
export_xml, \
|
|
21
|
+
define_output_file
|
|
15
22
|
from pandas.api.types import is_datetime64_any_dtype
|
|
16
23
|
from opex_manifest_generator.hash import HashGenerator
|
|
17
|
-
from opex_manifest_generator.common import
|
|
18
|
-
|
|
24
|
+
from opex_manifest_generator.common import zip_opex,\
|
|
25
|
+
remove_tree,\
|
|
26
|
+
win_256_check,\
|
|
27
|
+
filter_win_hidden,\
|
|
28
|
+
check_nan,\
|
|
29
|
+
check_opex,\
|
|
30
|
+
write_opex
|
|
31
|
+
from datetime import datetime
|
|
32
|
+
|
|
33
|
+
logger = logging.getLogger(__name__)
|
|
19
34
|
|
|
20
35
|
class OpexManifestGenerator():
|
|
21
36
|
"""
|
|
@@ -53,7 +68,7 @@ class OpexManifestGenerator():
|
|
|
53
68
|
output_path: str = os.getcwd(),
|
|
54
69
|
meta_dir_flag: bool = True,
|
|
55
70
|
metadata_dir: str = os.path.join(os.path.dirname(os.path.realpath(__file__)), "metadata"),
|
|
56
|
-
metadata_flag: str =
|
|
71
|
+
metadata_flag: Optional[str] = None,
|
|
57
72
|
autoref_flag: str = None,
|
|
58
73
|
prefix: str = None,
|
|
59
74
|
suffix: str = None,
|
|
@@ -65,19 +80,21 @@ class OpexManifestGenerator():
|
|
|
65
80
|
pax_fixity: bool = False,
|
|
66
81
|
fixity_export_flag: bool = True,
|
|
67
82
|
empty_flag: bool = False,
|
|
83
|
+
empty_export_flag: bool = True,
|
|
68
84
|
removal_flag: bool = False,
|
|
85
|
+
removal_export_flag: bool = True,
|
|
69
86
|
clear_opex_flag: bool = False,
|
|
70
87
|
export_flag: bool = False,
|
|
71
88
|
input: str = None,
|
|
72
89
|
zip_flag: bool = False,
|
|
90
|
+
zip_file_removal: bool = False,
|
|
73
91
|
hidden_flag: bool = False,
|
|
74
92
|
output_format: str = "xlsx",
|
|
75
|
-
print_xmls_flag: bool = False,
|
|
76
93
|
options_file: str = os.path.join(os.path.dirname(__file__),'options','options.properties'),
|
|
77
94
|
keywords: list = None,
|
|
78
95
|
keywords_mode: str = "initialise",
|
|
79
96
|
keywords_retain_order: bool = False,
|
|
80
|
-
|
|
97
|
+
keywords_case_sensitivity: bool = False,
|
|
81
98
|
keywords_abbreviation_number: int = 3,
|
|
82
99
|
sort_key = lambda x: (os.path.isfile(x), str.casefold(x)),
|
|
83
100
|
delimiter = "/",
|
|
@@ -86,7 +103,7 @@ class OpexManifestGenerator():
|
|
|
86
103
|
self.root = os.path.abspath(root)
|
|
87
104
|
# Base Parameters
|
|
88
105
|
self.opexns = "http://www.openpreservationexchange.org/opex/v1.2"
|
|
89
|
-
self.start_time = datetime.
|
|
106
|
+
self.start_time = datetime.now()
|
|
90
107
|
self.list_path = []
|
|
91
108
|
self.list_fixity = []
|
|
92
109
|
|
|
@@ -99,17 +116,20 @@ class OpexManifestGenerator():
|
|
|
99
116
|
self.meta_dir_flag = meta_dir_flag
|
|
100
117
|
self.hidden_flag = hidden_flag
|
|
101
118
|
self.zip_flag = zip_flag
|
|
119
|
+
self.zip_file_removal = zip_file_removal
|
|
120
|
+
|
|
102
121
|
self.empty_flag = empty_flag
|
|
122
|
+
self.empty_export_flag = empty_export_flag
|
|
103
123
|
|
|
104
124
|
# Parameters for Input Option
|
|
105
125
|
self.input = input
|
|
106
126
|
self.removal_flag = removal_flag
|
|
107
127
|
if self.removal_flag:
|
|
108
128
|
self.removal_list = []
|
|
129
|
+
self.removal_export_flag = removal_export_flag
|
|
109
130
|
self.export_flag = export_flag
|
|
110
131
|
self.metadata_flag = metadata_flag
|
|
111
132
|
self.metadata_dir = metadata_dir
|
|
112
|
-
self.print_xmls_flag = print_xmls_flag
|
|
113
133
|
|
|
114
134
|
# Parameters for Auto Reference
|
|
115
135
|
self.autoref_flag = autoref_flag
|
|
@@ -123,7 +143,7 @@ class OpexManifestGenerator():
|
|
|
123
143
|
self.keywords_list = keywords
|
|
124
144
|
self.keywords_mode = keywords_mode
|
|
125
145
|
self.keywords_retain_order = keywords_retain_order
|
|
126
|
-
self.
|
|
146
|
+
self.keywords_case_sensitivity = keywords_case_sensitivity
|
|
127
147
|
self.keywords_abbreviation_number = keywords_abbreviation_number
|
|
128
148
|
self.sort_key = sort_key
|
|
129
149
|
self.delimiter = delimiter
|
|
@@ -141,146 +161,198 @@ class OpexManifestGenerator():
|
|
|
141
161
|
|
|
142
162
|
def parse_config(self, options_file: str = os.path.join('options','options.properties')) -> None:
|
|
143
163
|
config = configparser.ConfigParser()
|
|
144
|
-
config.read(options_file, encoding='utf-8')
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
global TITLE_FIELD
|
|
148
|
-
TITLE_FIELD = config['options']['TITLE_FIELD']
|
|
149
|
-
global DESCRIPTION_FIELD
|
|
150
|
-
DESCRIPTION_FIELD = config['options']['DESCRIPTION_FIELD']
|
|
151
|
-
global SECURITY_FIELD
|
|
152
|
-
SECURITY_FIELD = config['options']['SECURITY_FIELD']
|
|
153
|
-
global IDENTIFIER_FIELD
|
|
154
|
-
IDENTIFIER_FIELD = config['options']['IDENTIFIER_FIELD']
|
|
155
|
-
global IDENTIFIER_DEFAULT
|
|
156
|
-
IDENTIFIER_DEFAULT = config['options']['IDENTIFIER_DEFAULT']
|
|
157
|
-
global REMOVAL_FIELD
|
|
158
|
-
REMOVAL_FIELD = config['options']['REMOVAL_FIELD']
|
|
159
|
-
global IGNORE_FIELD
|
|
160
|
-
IGNORE_FIELD = config['options']['IGNORE_FIELD']
|
|
161
|
-
global SOURCEID_FIELD
|
|
162
|
-
SOURCEID_FIELD = config['options']['SOURCEID_FIELD']
|
|
163
|
-
global HASH_FIELD
|
|
164
|
-
HASH_FIELD = config['options']['HASH_FIELD']
|
|
165
|
-
global ALGORITHM_FIELD
|
|
166
|
-
ALGORITHM_FIELD = config['options']['ALGORITHM_FIELD']
|
|
167
|
-
global ARCREF_FIELD
|
|
168
|
-
ARCREF_FIELD = config['options']['ARCREF_FIELD']
|
|
169
|
-
global ACCREF_CODE
|
|
170
|
-
ACCREF_CODE = config['options']['ACCREF_CODE']
|
|
171
|
-
global ACCREF_FIELD
|
|
172
|
-
ACCREF_FIELD = config['options']['ACCREF_FIELD']
|
|
164
|
+
read_config = config.read(options_file, encoding='utf-8')
|
|
165
|
+
if not read_config:
|
|
166
|
+
logger.warning(f"Options files not found or not reable: {options_file}. Using defaults.")
|
|
173
167
|
|
|
174
|
-
|
|
175
|
-
FIXITY_SUFFIX = config['options']['FIXITY_SUFFIX']
|
|
176
|
-
global REMOVALS_SUFFIX
|
|
177
|
-
REMOVALS_SUFFIX = config['options']['REMOVALS_SUFFIX']
|
|
178
|
-
global METAFOLDER
|
|
179
|
-
METAFOLDER = config['options']['METAFOLDER']
|
|
180
|
-
global GENERIC_DEFAULT_SECURITY
|
|
181
|
-
GENERIC_DEFAULT_SECURITY = config['options']['GENERIC_DEFAULT_SECURITY']
|
|
168
|
+
section = config['options'] if 'options' in config else {}
|
|
182
169
|
|
|
170
|
+
self.INDEX_FIELD = section.get('INDEX_FIELD', "FullName")
|
|
171
|
+
self.TITLE_FIELD = section.get('TITLE_FIELD', "Title")
|
|
172
|
+
self.DESCRIPTION_FIELD = section.get('DESCRIPTION_FIELD', "Description")
|
|
173
|
+
self.SECURITY_FIELD = section.get('SECURITY_FIELD', "Security")
|
|
174
|
+
self.IDENTIFIER_FIELD = section.get('IDENTIFIER_FIELD', "Identifier")
|
|
175
|
+
self.IDENTIFIER_DEFAULT = section.get('IDENTIFIER_DEFAULT', "code")
|
|
176
|
+
self.REMOVAL_FIELD = section.get('REMOVAL_FIELD', "Removals")
|
|
177
|
+
self.IGNORE_FIELD = section.get('IGNORE_FIELD', "Ignore")
|
|
178
|
+
self.SOURCEID_FIELD = section.get('SOURCEID_FIELD', "SourceID")
|
|
179
|
+
self.HASH_FIELD = section.get('HASH_FIELD', "Hash")
|
|
180
|
+
self.ALGORITHM_FIELD = section.get('ALGORITHM_FIELD', "Algorithm")
|
|
181
|
+
self.ARCREF_FIELD = section.get('ARCREF_FIELD', "Archive_Reference")
|
|
182
|
+
self.ACCREF_CODE = section.get('ACCREF_CODE', "Accession_Reference")
|
|
183
|
+
self.ACCREF_FIELD = section.get('ACCREF_FIELD', "accref")
|
|
184
|
+
self.FIXITY_SUFFIX = section.get('FIXITY_SUFFIX', "_Fixity")
|
|
185
|
+
self.REMOVALS_SUFFIX = section.get('REMOVALS_SUFFIX', "_Removals")
|
|
186
|
+
self.METAFOLDER = section.get('METAFOLDER', "meta")
|
|
187
|
+
self.GENERIC_DEFAULT_SECURITY = section.get('GENERIC_DEFAULT_SECURITY', "open")
|
|
188
|
+
logger.debug(f'Configuration set to: {[{k,v} for k,v in (section.items())]}')
|
|
183
189
|
|
|
184
190
|
def print_descriptive_xmls(self) -> None:
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
191
|
+
try:
|
|
192
|
+
for file in os.scandir(self.metadata_dir):
|
|
193
|
+
path = os.path.join(self.metadata_dir, file.name)
|
|
194
|
+
print(path)
|
|
195
|
+
xml_file = ET.parse(path)
|
|
196
|
+
root_element = ET.QName(xml_file.find('.'))
|
|
197
|
+
root_element_ln = root_element.localname
|
|
198
|
+
for elem in xml_file.findall(".//"):
|
|
199
|
+
if elem.getchildren():
|
|
200
|
+
pass
|
|
201
|
+
else:
|
|
202
|
+
elem_path = xml_file.getelementpath(elem)
|
|
203
|
+
elem = ET.QName(elem)
|
|
204
|
+
elem_lnpath = elem_path.replace(f"{{{elem.namespace}}}", root_element_ln + ":")
|
|
205
|
+
print(elem_lnpath)
|
|
206
|
+
except Exception as e:
|
|
207
|
+
logger.exception(f'Failed to print Descriptive metadta files, ensure correct path {e}')
|
|
208
|
+
raise
|
|
199
209
|
|
|
210
|
+
def convert_descriptive_xmls(self) -> None:
|
|
211
|
+
try:
|
|
212
|
+
for file in os.scandir(self.metadata_dir):
|
|
213
|
+
path = os.path.join(self.metadata_dir, file.name)
|
|
214
|
+
xml_file = ET.parse(path)
|
|
215
|
+
root_element = ET.QName(xml_file.find('.'))
|
|
216
|
+
root_element_ln = root_element.localname
|
|
217
|
+
column_list = []
|
|
218
|
+
for elem in xml_file.findall(".//"):
|
|
219
|
+
if elem.getchildren():
|
|
220
|
+
pass
|
|
221
|
+
else:
|
|
222
|
+
elem_path = xml_file.getelementpath(elem)
|
|
223
|
+
elem = ET.QName(elem)
|
|
224
|
+
elem_lnpath = elem_path.replace(f"{{{elem.namespace}}}", root_element_ln + ":")
|
|
225
|
+
column_list.append(elem_lnpath)
|
|
226
|
+
df = pd.DataFrame(columns=column_list,index=None)
|
|
227
|
+
if self.output_format == 'xlsx':
|
|
228
|
+
export_xl(df,file.name.replace('.xml','.xlsx'))
|
|
229
|
+
elif self.output_format == 'ods':
|
|
230
|
+
export_ods(df,file.name.replace('.xml','.ods'))
|
|
231
|
+
elif self.output_format == 'csv':
|
|
232
|
+
export_csv(df,file.name.replace('.xml','.csv'))
|
|
233
|
+
elif self.output_format == 'json':
|
|
234
|
+
export_json(df,file.name.replace('.xml','.json'))
|
|
235
|
+
else:
|
|
236
|
+
export_xl(df, file.name.replace('.xml','.xlsx'))
|
|
237
|
+
except Exception as e:
|
|
238
|
+
logger.exception(f'Failed to print Descriptive metadta files, ensure correct path {e}')
|
|
239
|
+
raise
|
|
240
|
+
|
|
200
241
|
def set_input_flags(self) -> None:
|
|
201
|
-
if TITLE_FIELD in self.column_headers:
|
|
242
|
+
if self.TITLE_FIELD in self.column_headers:
|
|
202
243
|
self.title_flag = True
|
|
203
|
-
if DESCRIPTION_FIELD in self.column_headers:
|
|
244
|
+
if self.DESCRIPTION_FIELD in self.column_headers:
|
|
204
245
|
self.description_flag = True
|
|
205
|
-
if SECURITY_FIELD in self.column_headers:
|
|
246
|
+
if self.SECURITY_FIELD in self.column_headers:
|
|
206
247
|
self.security_flag = True
|
|
207
|
-
if SOURCEID_FIELD in self.column_headers:
|
|
248
|
+
if self.SOURCEID_FIELD in self.column_headers:
|
|
208
249
|
self.sourceid_flag = True
|
|
209
|
-
if IGNORE_FIELD in self.column_headers:
|
|
250
|
+
if self.IGNORE_FIELD in self.column_headers:
|
|
210
251
|
self.ignore_flag = True
|
|
211
|
-
if HASH_FIELD in self.column_headers and ALGORITHM_FIELD in self.column_headers:
|
|
252
|
+
if self.HASH_FIELD in self.column_headers and self.ALGORITHM_FIELD in self.column_headers:
|
|
212
253
|
self.hash_from_spread = True
|
|
213
|
-
|
|
214
|
-
|
|
254
|
+
logger.info("Hash detected in Spreadsheet; taking hashes from spreadsheet")
|
|
255
|
+
logger.debug("Flags set")
|
|
215
256
|
|
|
216
257
|
def init_df(self) -> None:
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
self.
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
self.
|
|
256
|
-
|
|
257
|
-
self.
|
|
258
|
-
|
|
259
|
-
self.
|
|
260
|
-
|
|
261
|
-
self.
|
|
262
|
-
|
|
263
|
-
self.
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
258
|
+
try:
|
|
259
|
+
if self.autoref_flag:
|
|
260
|
+
ar = ReferenceGenerator(self.root,
|
|
261
|
+
output_path = self.output_path,
|
|
262
|
+
prefix = self.prefix,
|
|
263
|
+
accprefix = self.acc_prefix,
|
|
264
|
+
suffix = self.suffix,
|
|
265
|
+
suffix_options = self.suffix_option,
|
|
266
|
+
start_ref = self.start_ref,
|
|
267
|
+
empty_flag = self.empty_flag,
|
|
268
|
+
accession_flag=self.accession_mode,
|
|
269
|
+
keywords = self.keywords_list,
|
|
270
|
+
keywords_mode = self.keywords_mode,
|
|
271
|
+
keywords_retain_order = self.keywords_retain_order,
|
|
272
|
+
keywords_abbreviation_number = self.keywords_abbreviation_number,
|
|
273
|
+
keywords_case_sensitivity = self.keywords_case_sensitivity,
|
|
274
|
+
delimiter = self.delimiter,
|
|
275
|
+
sort_key = self.sort_key)
|
|
276
|
+
self.df = ar.init_dataframe()
|
|
277
|
+
if self.autoref_flag in {"accession", "a", "accession-generic", "ag"}:
|
|
278
|
+
self.df = self.df.drop(self.ARCREF_FIELD, axis=1)
|
|
279
|
+
self.column_headers = self.df.columns.values.tolist()
|
|
280
|
+
self.set_input_flags()
|
|
281
|
+
if self.export_flag:
|
|
282
|
+
output_path = define_output_file(self.output_path, self.root, self.METAFOLDER, meta_dir_flag = self.meta_dir_flag, output_format = self.output_format)
|
|
283
|
+
if self.output_format == "xlsx":
|
|
284
|
+
export_xl(self.df, output_path)
|
|
285
|
+
elif self.output_format == "csv":
|
|
286
|
+
export_csv(self.df, output_path)
|
|
287
|
+
elif self.output_format == "json":
|
|
288
|
+
export_json(self.df.to_dict(orient='records'), output_path)
|
|
289
|
+
elif self.output_format == "ods":
|
|
290
|
+
export_ods(self.df, output_path)
|
|
291
|
+
elif self.output_format == "xml":
|
|
292
|
+
export_xml(self.df, output_path)
|
|
293
|
+
logger.debug(f'Auto Reference Dataframe initialised with columns: {self.column_headers}')
|
|
294
|
+
return True
|
|
295
|
+
elif self.input:
|
|
296
|
+
if self.input.endswith(('.xlsx','.xls','.xlsm')):
|
|
297
|
+
self.df = pd.read_excel(self.input)
|
|
298
|
+
elif self.input.endswith('.csv'):
|
|
299
|
+
self.df = pd.read_csv(self.input)
|
|
300
|
+
elif self.input.endswith('.json'):
|
|
301
|
+
self.df = pd.read_json(self.input)
|
|
302
|
+
elif self.input.endswith('.ods'):
|
|
303
|
+
self.df = pd.read_excel(self.input, engine='odf')
|
|
304
|
+
elif self.input.endswith('.xml'):
|
|
305
|
+
self.df = pd.read_xml(self.input)
|
|
306
|
+
self.column_headers = self.df.columns.values.tolist()
|
|
307
|
+
self.set_input_flags()
|
|
308
|
+
logger.debug(f'Input Dataframe initialised with columns: {self.column_headers}')
|
|
309
|
+
return True
|
|
310
|
+
else:
|
|
311
|
+
logger.warning('No Auto Reference or Input file specified, proceeding without Dataframe')
|
|
312
|
+
self.df = None
|
|
313
|
+
self.column_headers = None
|
|
314
|
+
return False
|
|
315
|
+
except Exception as e:
|
|
316
|
+
logger.exception(f'Failed to intialise Dataframe: {e}')
|
|
317
|
+
raise
|
|
318
|
+
|
|
270
319
|
def clear_opex(self) -> None:
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
for
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
320
|
+
try:
|
|
321
|
+
walk = list(os.walk(self.root))
|
|
322
|
+
for dir, _, files in walk[::-1]:
|
|
323
|
+
for file in files:
|
|
324
|
+
file_path = win_256_check(os.path.join(dir, file))
|
|
325
|
+
if str(file_path).endswith('.opex'):
|
|
326
|
+
os.remove(file_path)
|
|
327
|
+
logger.info(f'Cleared Opex: {file_path}')
|
|
328
|
+
except Exception as e:
|
|
329
|
+
logger.exception(f'Error looking up Clearing Opex: {e}')
|
|
330
|
+
raise
|
|
278
331
|
|
|
279
332
|
def index_df_lookup(self, path: str) -> pd.Index:
|
|
280
|
-
|
|
281
|
-
|
|
333
|
+
if getattr(self, 'df', None) is None:
|
|
334
|
+
logger.error('Dataframe not initialised, cannot perform lookup')
|
|
335
|
+
raise RuntimeError('Dataframe not initialised, cannot perform lookup')
|
|
336
|
+
try:
|
|
337
|
+
idx = self.df.loc[self.df[self.INDEX_FIELD] == path, self.INDEX_FIELD].index
|
|
338
|
+
return idx
|
|
339
|
+
except KeyError as e:
|
|
340
|
+
logger.exception(f'Key Error in Index Lookup: {e}' \
|
|
341
|
+
'\n Please ensure column header\'s are an exact match.')
|
|
342
|
+
raise
|
|
343
|
+
except IndexError as e:
|
|
344
|
+
logger.warning(f'Index Error in Index Lookup: {e}. Proceeding...' \
|
|
345
|
+
'\nIt is likely you have removed or added a file/folder to the directory' \
|
|
346
|
+
'\nafter generating your input spreadsheet. An opex will still be generated but information may be missing.' \
|
|
347
|
+
'\nTo ensure metadata match up please regenerate the spreadsheet.')
|
|
348
|
+
except Exception as e:
|
|
349
|
+
logger.exception(f'Error looking up Index from Dataframe: {e}')
|
|
350
|
+
raise
|
|
282
351
|
|
|
283
352
|
def xip_df_lookup(self, idx: pd.Index) -> tuple:
|
|
353
|
+
if getattr(self, 'df', None) is None:
|
|
354
|
+
logger.error('Dataframe not initialised, cannot perform lookup')
|
|
355
|
+
raise RuntimeError('Dataframe not initialised, cannot perform lookup')
|
|
284
356
|
try:
|
|
285
357
|
title = None
|
|
286
358
|
description = None
|
|
@@ -289,139 +361,247 @@ class OpexManifestGenerator():
|
|
|
289
361
|
pass
|
|
290
362
|
else:
|
|
291
363
|
if self.title_flag:
|
|
292
|
-
title = check_nan(self.df.loc[idx,TITLE_FIELD].item())
|
|
364
|
+
title = check_nan(self.df.loc[idx,self.TITLE_FIELD].item())
|
|
293
365
|
if self.description_flag:
|
|
294
|
-
description = check_nan(self.df.loc[idx,DESCRIPTION_FIELD].item())
|
|
366
|
+
description = check_nan(self.df.loc[idx,self.DESCRIPTION_FIELD].item())
|
|
295
367
|
if self.security_flag:
|
|
296
|
-
security = check_nan(self.df.loc[idx,SECURITY_FIELD].item())
|
|
368
|
+
security = check_nan(self.df.loc[idx,self.SECURITY_FIELD].item())
|
|
297
369
|
return title,description,security
|
|
370
|
+
except KeyError as e:
|
|
371
|
+
logger.exception(f'Key Error in Removal Lookup: {e}'
|
|
372
|
+
'\n Please ensure column header\'s are an exact match.')
|
|
373
|
+
raise
|
|
374
|
+
except IndexError as e:
|
|
375
|
+
logger.warning(f'Index Error in Removal Lookup: {e}. Proceeding...'
|
|
376
|
+
'\nIt is likely you have removed or added a file/folder to the directory'
|
|
377
|
+
'\nafter generating your input spreadsheet. An opex will still be generated, but information may be missing.'
|
|
378
|
+
'\nTo ensure metadata match up please regenerate the spreadsheet.')
|
|
298
379
|
except Exception as e:
|
|
299
|
-
|
|
300
|
-
|
|
380
|
+
logger.exception(f'Error looking up XIP from Dataframe: {e}')
|
|
381
|
+
raise
|
|
301
382
|
|
|
302
383
|
def removal_df_lookup(self, idx: pd.Index) -> bool:
|
|
384
|
+
if getattr(self, 'df', None) is None:
|
|
385
|
+
logger.error('Dataframe not initialised, cannot perform lookup')
|
|
386
|
+
raise RuntimeError('Dataframe not initialised, cannot perform lookup')
|
|
303
387
|
try:
|
|
304
388
|
if idx.empty:
|
|
305
389
|
return False
|
|
306
390
|
else:
|
|
307
|
-
remove = check_nan(self.df.loc[idx,REMOVAL_FIELD].item())
|
|
391
|
+
remove = check_nan(self.df.loc[idx,self.REMOVAL_FIELD].item())
|
|
308
392
|
if remove is not None:
|
|
309
393
|
return True
|
|
310
394
|
else:
|
|
311
395
|
return False
|
|
396
|
+
except KeyError as e:
|
|
397
|
+
logger.exception(f'Key Error in Removal Lookup: {e}'
|
|
398
|
+
'\n Please ensure column header\'s are an exact match.')
|
|
399
|
+
raise
|
|
400
|
+
except IndexError as e:
|
|
401
|
+
logger.warning(f'Index Error in Removal Lookup: {e}. Proceeding...'
|
|
402
|
+
'\nIt is likely you have removed or added a file/folder to the directory'
|
|
403
|
+
'\nafter generating your input spreadsheet. An opex will still be generated, but information may be missing.'
|
|
404
|
+
'\nTo ensure metadata match up please regenerate the spreadsheet.')
|
|
312
405
|
except Exception as e:
|
|
313
|
-
|
|
314
|
-
|
|
406
|
+
logger.exception(f'Error looking up Removals from Dataframe: {e}')
|
|
407
|
+
raise
|
|
315
408
|
|
|
316
409
|
def ignore_df_lookup(self, idx: pd.Index) -> bool:
|
|
410
|
+
if getattr(self, 'df', None) is None:
|
|
411
|
+
logger.error('Dataframe not initialised, cannot perform lookup')
|
|
412
|
+
raise RuntimeError('Dataframe not initialised, cannot perform lookup')
|
|
317
413
|
try:
|
|
318
414
|
if idx.empty:
|
|
319
415
|
return False
|
|
320
416
|
else:
|
|
321
|
-
ignore = check_nan(self.df.loc[idx,IGNORE_FIELD].item())
|
|
417
|
+
ignore = check_nan(self.df.loc[idx,self.IGNORE_FIELD].item())
|
|
322
418
|
return bool(ignore)
|
|
419
|
+
except KeyError as e:
|
|
420
|
+
logger.exception(f'Key Error in Ignore Lookup: {e}'
|
|
421
|
+
'\n Please ensure column header\'s are an exact match.')
|
|
422
|
+
raise
|
|
423
|
+
except IndexError as e:
|
|
424
|
+
logger.warning(f'Index Error in Ignore Lookup: {e}. Proceeding...'
|
|
425
|
+
'\nIt is likely you have removed or added a file/folder to the directory'
|
|
426
|
+
'\nafter generating your input spreadsheet. An opex will still be generated but information may be missing.'
|
|
427
|
+
'\nTo ensure metadata match up please regenerate the spreadsheet.')
|
|
323
428
|
except Exception as e:
|
|
324
|
-
|
|
325
|
-
|
|
429
|
+
logger.exception(f'Error looking up Ignore from Dataframe: {e}')
|
|
430
|
+
return False
|
|
326
431
|
|
|
327
432
|
def sourceid_df_lookup(self, xml_element: ET.SubElement, idx: pd.Index) -> None:
|
|
433
|
+
if getattr(self, 'df', None) is None:
|
|
434
|
+
logger.error('Dataframe not initialised, cannot perform lookup')
|
|
435
|
+
raise RuntimeError('Dataframe not initialised, cannot perform lookup')
|
|
328
436
|
try:
|
|
329
437
|
if idx.empty:
|
|
330
438
|
pass
|
|
331
439
|
else:
|
|
332
|
-
sourceid = check_nan(self.df.loc[idx,SOURCEID_FIELD].item())
|
|
440
|
+
sourceid = check_nan(self.df.loc[idx,self.SOURCEID_FIELD].item())
|
|
333
441
|
if sourceid:
|
|
334
442
|
source_xml = ET.SubElement(xml_element,f"{{{self.opexns}}}SourceID")
|
|
335
443
|
source_xml.text = str(sourceid)
|
|
444
|
+
except KeyError as e:
|
|
445
|
+
logger.exception(f'Key Error in SourceID Lookup: {e}'
|
|
446
|
+
'\n Please ensure column header\'s are an exact match.')
|
|
447
|
+
raise
|
|
448
|
+
except IndexError as e:
|
|
449
|
+
logger.warning(f'Index Error in SourceID Lookup: {e}. Proceeding...'
|
|
450
|
+
'\nIt is likely you have removed or added a file/folder to the directory'
|
|
451
|
+
'\nafter generating your input spreadsheet. An opex will still be generated but information may be missing.'
|
|
452
|
+
'\nTo ensure metadata match up please regenerate the spreadsheet.')
|
|
336
453
|
except Exception as e:
|
|
337
|
-
|
|
338
|
-
|
|
454
|
+
logger.exception(f'Error looking up SourceID from Dataframe: {e}')
|
|
455
|
+
raise
|
|
339
456
|
|
|
340
457
|
def hash_df_lookup(self, xml_fixities: ET.SubElement, idx: pd.Index) -> None:
|
|
458
|
+
if getattr(self, 'df', None) is None:
|
|
459
|
+
logger.error('Dataframe not initialised, cannot perform lookup')
|
|
460
|
+
raise RuntimeError('Dataframe not initialised, cannot perform lookup')
|
|
341
461
|
try:
|
|
462
|
+
hash_value = None
|
|
463
|
+
algo_value = None
|
|
464
|
+
file_path = None
|
|
465
|
+
|
|
342
466
|
if idx.empty:
|
|
343
|
-
|
|
467
|
+
return
|
|
344
468
|
else:
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
469
|
+
# prefer the algorithm specified in the spreadsheet for this row
|
|
470
|
+
if not self.column_headers or (self.HASH_FIELD not in self.column_headers and self.ALGORITHM_FIELD not in self.column_headers):
|
|
471
|
+
return
|
|
472
|
+
hash_value = check_nan(self.df.loc[idx, self.HASH_FIELD].item())
|
|
473
|
+
algo_value = check_nan(self.df.loc[idx, self.ALGORITHM_FIELD].item())
|
|
474
|
+
file_path = check_nan(self.df.loc[idx, self.INDEX_FIELD].item()) if self.INDEX_FIELD in self.column_headers else None
|
|
475
|
+
|
|
476
|
+
if algo_value is not None:
|
|
477
|
+
self.fixity = ET.SubElement(xml_fixities, f"{{{self.opexns}}}Fixity")
|
|
478
|
+
self.fixity.set('type', algo_value)
|
|
479
|
+
self.fixity.set('value', str(hash_value))
|
|
480
|
+
logger.debug(f'Using Algorithm from Spreadsheet: {algo_value} with Hash: {hash_value}')
|
|
481
|
+
|
|
482
|
+
else:
|
|
483
|
+
if file_path is not None:
|
|
484
|
+
# fallback to configured algorithms
|
|
485
|
+
logger.debug('No Algorithm specified in Spreadsheet for this entry; ')
|
|
486
|
+
if file_path.endswith('.pax.zip') or file_path.endswith('.pax'):
|
|
487
|
+
self.generate_pax_zip_opex_fixity(file_path, self.algorithm, self.list_fixity)
|
|
488
|
+
else:
|
|
489
|
+
self.generate_opex_fixity(file_path, self.algorithm, self.list_fixity)
|
|
490
|
+
except KeyError as e:
|
|
491
|
+
logger.exception(f'Key Error in Hash Lookup: {e}'
|
|
492
|
+
'\n Please ensure column header\'s are an exact match.')
|
|
493
|
+
raise
|
|
494
|
+
except IndexError as e:
|
|
495
|
+
logger.warning(f'Index Error in Hash Lookup: {e}. Proceeding...'
|
|
496
|
+
'\nIt is likely you have removed or added a file/folder to the directory'
|
|
497
|
+
'\nafter generating your input spreadsheet. An opex will still be generated but information may be missing.'
|
|
498
|
+
'\nTo ensure metadata match up please regenerate the spreadsheet.')
|
|
351
499
|
except Exception as e:
|
|
352
|
-
|
|
353
|
-
|
|
500
|
+
logger.exception(f'Error looking up Hash from Dataframe: {e}')
|
|
501
|
+
raise
|
|
354
502
|
|
|
355
503
|
def ident_df_lookup(self, idx: pd.Index, default_key: str = None) -> None:
|
|
504
|
+
if getattr(self, 'df', None) is None:
|
|
505
|
+
logger.error('Dataframe not initialised, cannot perform lookup')
|
|
506
|
+
raise RuntimeError('Dataframe not initialised, cannot perform lookup')
|
|
356
507
|
try:
|
|
357
508
|
if idx.empty:
|
|
358
509
|
pass
|
|
359
510
|
else:
|
|
360
511
|
for header in self.column_headers:
|
|
361
512
|
ident = None
|
|
362
|
-
if any(s in header for s in {IDENTIFIER_FIELD,ARCREF_FIELD,ACCREF_FIELD}):
|
|
363
|
-
if f'{IDENTIFIER_FIELD}:' in header:
|
|
513
|
+
if any(s in header for s in {self.IDENTIFIER_FIELD,self.ARCREF_FIELD,self.ACCREF_FIELD}):
|
|
514
|
+
if f'{self.IDENTIFIER_FIELD}:' in header:
|
|
364
515
|
key_name = str(header).split(':',1)[-1]
|
|
365
|
-
elif IDENTIFIER_FIELD in header:
|
|
366
|
-
key_name = IDENTIFIER_DEFAULT
|
|
367
|
-
elif ARCREF_FIELD in header:
|
|
368
|
-
key_name = IDENTIFIER_DEFAULT
|
|
369
|
-
elif ACCREF_FIELD in header:
|
|
370
|
-
key_name = ACCREF_CODE
|
|
516
|
+
elif self.IDENTIFIER_FIELD in header:
|
|
517
|
+
key_name = self.IDENTIFIER_DEFAULT
|
|
518
|
+
elif self.ARCREF_FIELD in header:
|
|
519
|
+
key_name = self.IDENTIFIER_DEFAULT
|
|
520
|
+
elif self.ACCREF_FIELD in header:
|
|
521
|
+
key_name = self.ACCREF_CODE
|
|
371
522
|
else:
|
|
372
|
-
key_name = IDENTIFIER_DEFAULT
|
|
523
|
+
key_name = self.IDENTIFIER_DEFAULT
|
|
373
524
|
ident = check_nan(self.df.loc[idx,header].item())
|
|
374
525
|
if ident:
|
|
375
526
|
self.identifier = ET.SubElement(self.identifiers, f"{{{self.opexns}}}Identifier")
|
|
376
527
|
self.identifier.set("type", key_name)
|
|
377
528
|
self.identifier.text = str(ident)
|
|
529
|
+
logger.debug(f'Adding Identifer: {header}: {ident}')
|
|
530
|
+
except KeyError as e:
|
|
531
|
+
logger.exception(f'Key Error in Identifer Lookup: {e}' \
|
|
532
|
+
'\n Please ensure column header\'s are an exact match.')
|
|
533
|
+
raise
|
|
534
|
+
except IndexError as e:
|
|
535
|
+
logger.warning(f'Index Error in Identifier Lookup: {e}. Proceeding...' \
|
|
536
|
+
'\nIt is likely you have removed or added a file/folder to the directory' \
|
|
537
|
+
'\nafter generating your input spreadsheet. An opex will still be generated but xml information may be missing.' \
|
|
538
|
+
'\nTo ensure metadata match up please regenerate the spreadsheet.')
|
|
378
539
|
except Exception as e:
|
|
379
|
-
|
|
380
|
-
|
|
540
|
+
logger.exception(f'Error looking up Identifiers: {e}')
|
|
541
|
+
raise
|
|
381
542
|
|
|
382
543
|
def init_generate_descriptive_metadata(self) -> None:
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
"""
|
|
387
|
-
Generates info on the elements of the XML Files placed in the Metadata directory.
|
|
388
|
-
Composed as a list of dictionaries.
|
|
389
|
-
"""
|
|
390
|
-
path = os.path.join(self.metadata_dir, file)
|
|
391
|
-
xml_file = ET.parse(path)
|
|
392
|
-
root_element = ET.QName(xml_file.find('.'))
|
|
393
|
-
root_element_ln = root_element.localname
|
|
394
|
-
#root_element_ns = root_element.namespace
|
|
395
|
-
elements_list = []
|
|
396
|
-
for elem in xml_file.findall('.//'):
|
|
397
|
-
elem_path = xml_file.getelementpath(elem)
|
|
398
|
-
elem = ET.QName(elem)
|
|
399
|
-
elem_ln = elem.localname
|
|
400
|
-
elem_ns = elem.namespace
|
|
401
|
-
elem_lnpath = elem_path.replace(f"{{{elem_ns}}}", root_element_ln + ":")
|
|
402
|
-
elements_list.append({"Name": root_element_ln + ":" + elem_ln, "Namespace": elem_ns, "Path": elem_lnpath})
|
|
403
|
-
|
|
404
|
-
"""
|
|
405
|
-
Compares the column headers in the Spreadsheet against the headers. Filters out non-matching data.
|
|
406
|
-
"""
|
|
544
|
+
try:
|
|
545
|
+
self.xml_files = []
|
|
546
|
+
for file in os.scandir(self.metadata_dir):
|
|
407
547
|
list_xml = []
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
548
|
+
if file.name.endswith('xml'):
|
|
549
|
+
"""
|
|
550
|
+
Generates info on the elements of the XML Files placed in the Metadata directory.
|
|
551
|
+
Composed as a list of dictionaries.
|
|
552
|
+
"""
|
|
553
|
+
path = os.path.join(self.metadata_dir, file.name)
|
|
554
|
+
try:
|
|
555
|
+
xml_file = ET.parse(path)
|
|
556
|
+
except ET.XMLSyntaxError as e:
|
|
557
|
+
logger.exception(f'XML Syntax Error parsing file {file.name}: {e}')
|
|
558
|
+
raise
|
|
559
|
+
except FileNotFoundError as e:
|
|
560
|
+
logger.exception(f'XML file not found {file.name}: {e}')
|
|
561
|
+
raise
|
|
562
|
+
root_element = ET.QName(xml_file.find('.'))
|
|
563
|
+
root_element_ln = root_element.localname
|
|
564
|
+
#root_element_ns = root_element.namespace
|
|
565
|
+
elements_list = []
|
|
566
|
+
for elem in xml_file.findall('.//'):
|
|
567
|
+
elem_path = xml_file.getelementpath(elem)
|
|
568
|
+
elem = ET.QName(elem)
|
|
569
|
+
elem_ln = elem.localname
|
|
570
|
+
elem_ns = elem.namespace
|
|
571
|
+
elem_lnpath = elem_path.replace(f"{{{elem_ns}}}", root_element_ln + ":")
|
|
572
|
+
elements_list.append({"Name": root_element_ln + ":" + elem_ln, "Namespace": elem_ns, "Path": elem_lnpath})
|
|
413
573
|
|
|
574
|
+
"""
|
|
575
|
+
Compares the column headers in the Spreadsheet against the headers. Filters out non-matching data.
|
|
576
|
+
"""
|
|
577
|
+
try:
|
|
578
|
+
for elem_dict in elements_list:
|
|
579
|
+
if elem_dict.get('Name') in self.column_headers or elem_dict.get('Path') in self.column_headers:
|
|
580
|
+
list_xml.append({"Name": elem_dict.get('Name'), "Namespace": elem_dict.get('Namespace'), "Path": elem_dict.get('Path')})
|
|
581
|
+
except Exception as e:
|
|
582
|
+
logger.exception(f'Failed comparing Column headers in XML: {e}')
|
|
583
|
+
raise
|
|
584
|
+
if len(list_xml) != 0:
|
|
585
|
+
self.xml_files.append({'data': list_xml, 'localname': root_element_ln, 'xmlfile': path})
|
|
586
|
+
logger.debug(f'XML file: {file.name} with matching columns added for descriptive metadata.')
|
|
587
|
+
else:
|
|
588
|
+
logger.warning(f'No matching columns found in XML file: {file.name}, skipping.')
|
|
589
|
+
return self.xml_files
|
|
590
|
+
except FileNotFoundError as e:
|
|
591
|
+
logger.exception(f'Metadata directory not found: {e}')
|
|
592
|
+
raise
|
|
593
|
+
except Exception as e:
|
|
594
|
+
logger.exception(f'Failed to intialise XML Metadata: {e}')
|
|
595
|
+
raise
|
|
414
596
|
def generate_descriptive_metadata(self, xml_desc_elem: ET.Element, idx: pd.Index) -> None:
|
|
415
597
|
"""
|
|
416
598
|
Composes the data into an xml file.
|
|
417
599
|
"""
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
else:
|
|
424
|
-
if idx.empty:
|
|
600
|
+
try:
|
|
601
|
+
for xml_file in self.xml_files:
|
|
602
|
+
list_xml = xml_file.get('data')
|
|
603
|
+
localname = xml_file.get('localname')
|
|
604
|
+
if len(list_xml) == 0 or idx.empty:
|
|
425
605
|
pass
|
|
426
606
|
else:
|
|
427
607
|
xml_new = ET.parse(xml_file.get('xmlfile'))
|
|
@@ -429,40 +609,44 @@ class OpexManifestGenerator():
|
|
|
429
609
|
name = elem_dict.get('Name')
|
|
430
610
|
path = elem_dict.get('Path')
|
|
431
611
|
ns = elem_dict.get('Namespace')
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
612
|
+
if self.metadata_flag in {'e', 'exact'}:
|
|
613
|
+
val_series = self.df.loc[idx,path]
|
|
614
|
+
val = check_nan(val_series.item())
|
|
615
|
+
elif self.metadata_flag in {'f', 'flat'}:
|
|
616
|
+
val_series = self.df.loc[idx,name]
|
|
617
|
+
val = check_nan(val_series.item())
|
|
618
|
+
if val is None:
|
|
619
|
+
continue
|
|
620
|
+
else:
|
|
621
|
+
if is_datetime64_any_dtype(val_series):
|
|
622
|
+
val = pd.to_datetime(val)
|
|
623
|
+
val = datetime.strftime(val, "%Y-%m-%dT%H:%M:%S.000Z")
|
|
624
|
+
if self.metadata_flag in {'e','exact'}:
|
|
625
|
+
n = path.replace(localname + ":", f"{{{ns}}}")
|
|
626
|
+
elem = xml_new.find(f'./{n}')
|
|
627
|
+
if elem is None:
|
|
628
|
+
logger.warning(f'XML element not found for path: {n} in {xml_file.get("xmlfile")}')
|
|
629
|
+
continue
|
|
630
|
+
elif self.metadata_flag in {'f', 'flat'}:
|
|
631
|
+
n = name.split(':')[-1]
|
|
632
|
+
elem = xml_new.find(f'.//{{{ns}}}{n}')
|
|
633
|
+
if elem is None:
|
|
634
|
+
logger.warning(f'XML element not found for name: {name} in {xml_file.get("xmlfile")}')
|
|
440
635
|
continue
|
|
441
|
-
|
|
442
|
-
if is_datetime64_any_dtype(val_series):
|
|
443
|
-
val = pd.to_datetime(val)
|
|
444
|
-
val = datetime.datetime.strftime(val, "%Y-%m-%dT%H:%M:%S.000Z")
|
|
445
|
-
if self.metadata_flag in {'e','exact'}:
|
|
446
|
-
n = path.replace(localname + ":", f"{{{ns}}}")
|
|
447
|
-
elem = xml_new.find(f'./{n}')
|
|
448
|
-
elif self.metadata_flag in {'f', 'flat'}:
|
|
449
|
-
n = name.split(':')[-1]
|
|
450
|
-
elem = xml_new.find(f'.//{{{ns}}}{n}')
|
|
451
|
-
elem.text = str(val)
|
|
452
|
-
except KeyError as e:
|
|
453
|
-
print('Key Error: please ensure column header\'s are an exact match...')
|
|
454
|
-
print(f'Missing Column: {e}')
|
|
455
|
-
print('Alternatively use flat mode...')
|
|
456
|
-
time.sleep(3)
|
|
457
|
-
raise SystemExit()
|
|
458
|
-
except IndexError as e:
|
|
459
|
-
print("""Index Error; it is likely you have removed or added a file/folder to the directory \
|
|
460
|
-
after generating the spreadsheet. An opex will still be generated but with no xml metadata. \
|
|
461
|
-
To ensure metadata match up please regenerate the spreadsheet...""")
|
|
462
|
-
print(f'Error: {e}')
|
|
463
|
-
time.sleep(5)
|
|
464
|
-
break
|
|
636
|
+
elem.text = str(val)
|
|
465
637
|
xml_desc_elem.append(xml_new.find('.'))
|
|
638
|
+
except KeyError as e:
|
|
639
|
+
logger.exception(f'Key Error in XML Lookup: {e}' \
|
|
640
|
+
'\n please ensure column header\'s are an exact match.')
|
|
641
|
+
raise
|
|
642
|
+
except IndexError as e:
|
|
643
|
+
logger.warning(f'Index Error: {e}' \
|
|
644
|
+
'\nIt is likely you have removed or added a file/folder to the directory' \
|
|
645
|
+
'after generating your input spreadsheet. An opex will still be generated but with no xml metadata.' \
|
|
646
|
+
'\nTo ensure metadata match up please regenerate the spreadsheet.')
|
|
647
|
+
except Exception as e:
|
|
648
|
+
logger.exception(f'General Error in XML Lookup: {e}')
|
|
649
|
+
raise
|
|
466
650
|
|
|
467
651
|
def generate_opex_properties(self, xmlroot: ET.Element, idx: int, title: str = None,
|
|
468
652
|
description: str = None, security: str = None) -> None:
|
|
@@ -479,61 +663,93 @@ class OpexManifestGenerator():
|
|
|
479
663
|
if self.autoref_flag not in {"generic", "g"} or self.input:
|
|
480
664
|
self.identifiers = ET.SubElement(self.properties, f"{{{self.opexns}}}Identifiers")
|
|
481
665
|
self.ident_df_lookup(idx)
|
|
482
|
-
if
|
|
666
|
+
# remove Properties element if no children were added
|
|
667
|
+
if len(self.properties) == 0:
|
|
483
668
|
xmlroot.remove(self.properties)
|
|
484
669
|
|
|
485
|
-
def generate_opex_fixity(self, file_path: str) ->
|
|
486
|
-
for
|
|
670
|
+
def generate_opex_fixity(self, file_path: str, algorithm: list | None = None) -> list:
|
|
671
|
+
"""Generate fixities for a file. If algorithm is None, defaults to ['SHA-1']."""
|
|
672
|
+
algorithm = algorithm or ['SHA-1']
|
|
673
|
+
list_fixity = []
|
|
674
|
+
for algorithm_type in algorithm:
|
|
487
675
|
self.fixity = ET.SubElement(self.fixities, f"{{{self.opexns}}}Fixity")
|
|
488
|
-
|
|
676
|
+
hash_value = HashGenerator(algorithm = algorithm_type).hash_generator(file_path)
|
|
489
677
|
self.fixity.set("type", algorithm_type)
|
|
490
|
-
self.fixity.set("value",
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
def
|
|
495
|
-
for
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
678
|
+
self.fixity.set("value", hash_value)
|
|
679
|
+
list_fixity.append([algorithm_type, hash_value, file_path])
|
|
680
|
+
return list_fixity
|
|
681
|
+
|
|
682
|
+
def generate_pax_folder_opex_fixity(self, folder_path: str, fixitiesxml: ET._Element, filesxml: ET._Element, algorithm: list | None = None) -> list:
|
|
683
|
+
"""Generate fixities for files inside a pax folder. If algorithm is None, defaults to ['SHA-1']."""
|
|
684
|
+
algorithm = algorithm or ['SHA-1']
|
|
685
|
+
list_fixity = []
|
|
686
|
+
list_path = []
|
|
687
|
+
for dir,_,files in os.walk(folder_path):
|
|
688
|
+
for filename in files:
|
|
689
|
+
rel_path = os.path.relpath(dir,folder_path)
|
|
690
|
+
rel_file = os.path.join(rel_path, filename).replace('\\','/')
|
|
691
|
+
abs_file = os.path.abspath(os.path.join(dir,filename))
|
|
692
|
+
list_path.append(abs_file)
|
|
693
|
+
for algorithm_type in algorithm:
|
|
694
|
+
self.fixity = ET.SubElement(fixitiesxml, f"{{{self.opexns}}}Fixity")
|
|
695
|
+
hash_value = HashGenerator(algorithm = algorithm_type).hash_generator(abs_file)
|
|
696
|
+
self.fixity.set("type", algorithm_type)
|
|
697
|
+
self.fixity.set("value", hash_value)
|
|
698
|
+
self.fixity.set("path", rel_file)
|
|
699
|
+
list_fixity.append([algorithm_type, hash_value, abs_file])
|
|
700
|
+
file = ET.SubElement(filesxml, f"{{{self.opexns}}}File")
|
|
701
|
+
file.set("type", "content")
|
|
702
|
+
file.set("size", str(os.path.getsize(abs_file)))
|
|
703
|
+
file.text = str(rel_file)
|
|
704
|
+
return list_fixity, list_path
|
|
506
705
|
|
|
706
|
+
|
|
707
|
+
def generate_pax_zip_opex_fixity(self, file_path: str, algorithm: list | None = None) -> list:
|
|
708
|
+
"""Generate fixities for files inside a pax/zip. If algorithm is None, defaults to ['SHA-1']."""
|
|
709
|
+
algorithm = algorithm or ['SHA-1']
|
|
710
|
+
list_fixity = []
|
|
711
|
+
for algorithm_type in algorithm:
|
|
712
|
+
with zipfile.ZipFile(file_path, 'r') as z:
|
|
713
|
+
for file in z.filelist:
|
|
714
|
+
self.fixity = ET.SubElement(self.fixities, f"{{{self.opexns}}}Fixity")
|
|
715
|
+
hash_value = HashGenerator(algorithm = algorithm_type).hash_generator_pax_zip(file.filename, z)
|
|
716
|
+
file_replace = file.filename.replace('\\', '/')
|
|
717
|
+
self.fixity.set("path", file_replace)
|
|
718
|
+
self.fixity.set("type", algorithm_type)
|
|
719
|
+
self.fixity.set("value", hash_value)
|
|
720
|
+
list_fixity.append([algorithm_type, hash_value, f"{file_path}/{file.filename}"])
|
|
721
|
+
return list_fixity
|
|
722
|
+
|
|
507
723
|
def main(self) -> None:
|
|
508
|
-
if self.print_xmls_flag:
|
|
509
|
-
self.print_descriptive_xmls()
|
|
510
|
-
input("Press Key to Close")
|
|
511
|
-
raise SystemExit()
|
|
512
|
-
print(f"Start time: {self.start_time}")
|
|
513
724
|
if self.clear_opex_flag:
|
|
514
725
|
self.clear_opex()
|
|
515
|
-
if self.autoref_flag or self.algorithm or self.input:
|
|
726
|
+
if self.autoref_flag or self.algorithm or self.input or self.zip_flag or self.export_flag or self.empty_flag or self.removal_flag:
|
|
516
727
|
pass
|
|
517
728
|
else:
|
|
518
|
-
|
|
519
|
-
print('Cleared OPEXES. No additional arguments passed, so ending program.'); time.sleep(3)
|
|
729
|
+
logger.info('Cleared Opexes. No additional arguments passed, so ending program.')
|
|
520
730
|
raise SystemExit()
|
|
521
731
|
if self.empty_flag:
|
|
522
|
-
|
|
732
|
+
logger.debug('Removing empty directories as per empty flag.')
|
|
733
|
+
ReferenceGenerator(self.root, self.output_path, meta_dir_flag = self.meta_dir_flag).remove_empty_directories(self.empty_export_flag)
|
|
734
|
+
df_flag = False
|
|
523
735
|
if not self.autoref_flag in {"g", "generic"}:
|
|
524
|
-
|
|
736
|
+
logger.debug('Auto Reference flag not set to generic, checking for Dataframe requirement.')
|
|
737
|
+
df_flag = self.init_df()
|
|
525
738
|
self.count = 1
|
|
526
|
-
if
|
|
739
|
+
if self.metadata_flag is not None:
|
|
740
|
+
if not df_flag:
|
|
741
|
+
logger.error('Metadata generation requires Auto Reference or Input file to be specified.')
|
|
742
|
+
raise ValueError('Metadata generation requires Auto Reference or Input file to be specified.')
|
|
527
743
|
self.init_generate_descriptive_metadata()
|
|
528
744
|
OpexDir(self, self.root).generate_opex_dirs(self.root)
|
|
529
745
|
if self.algorithm:
|
|
530
|
-
output_path = define_output_file(self.output_path, self.root, METAFOLDER, self.meta_dir_flag, output_suffix = FIXITY_SUFFIX, output_format = "txt")
|
|
746
|
+
output_path = define_output_file(self.output_path, self.root, self.METAFOLDER, self.meta_dir_flag, output_suffix = self.FIXITY_SUFFIX, output_format = "txt")
|
|
531
747
|
if self.fixity_export_flag:
|
|
532
748
|
export_list_txt(self.list_fixity, output_path)
|
|
533
749
|
if self.removal_flag:
|
|
534
|
-
output_path = define_output_file(self.output_path, self.root,
|
|
535
|
-
|
|
536
|
-
|
|
750
|
+
output_path = define_output_file(self.output_path, self.root, self.METAFOLDER, self.meta_dir_flag, output_suffix = self.REMOVALS_SUFFIX, output_format = "txt")
|
|
751
|
+
if self.removal_export_flag:
|
|
752
|
+
export_list_txt(self.removal_list, output_path)
|
|
537
753
|
|
|
538
754
|
class OpexDir(OpexManifestGenerator):
|
|
539
755
|
def __init__(self, OMG: OpexManifestGenerator, folder_path: str, title: str = None, description: str = None, security: str = None) -> None:
|
|
@@ -562,10 +778,12 @@ class OpexDir(OpexManifestGenerator):
|
|
|
562
778
|
if self.OMG.ignore_flag:
|
|
563
779
|
self.ignore = self.OMG.ignore_df_lookup(index)
|
|
564
780
|
if self.ignore:
|
|
781
|
+
logger.info(f'Ignoring folder as per ignore flag in spreadsheet: {self.folder_path}')
|
|
565
782
|
return
|
|
566
783
|
if self.OMG.removal_flag:
|
|
567
784
|
self.removal = self.OMG.removal_df_lookup(index)
|
|
568
785
|
if self.removal:
|
|
786
|
+
logger.info(f'Removing folder as per removal flag in spreadsheet: {self.folder_path}')
|
|
569
787
|
remove_tree(self.folder_path, self.OMG.removal_list)
|
|
570
788
|
return
|
|
571
789
|
self.xmlroot = ET.Element(f"{{{self.opexns}}}OPEXMetadata", nsmap={"opex":self.opexns})
|
|
@@ -587,35 +805,27 @@ class OpexDir(OpexManifestGenerator):
|
|
|
587
805
|
if security is not None:
|
|
588
806
|
self.security = security
|
|
589
807
|
else:
|
|
590
|
-
self.security = GENERIC_DEFAULT_SECURITY
|
|
808
|
+
self.security = self.GENERIC_DEFAULT_SECURITY
|
|
591
809
|
else:
|
|
592
810
|
self.title = title
|
|
593
811
|
self.description = description
|
|
594
812
|
self.security = security
|
|
595
813
|
if self.OMG.sourceid_flag:
|
|
596
|
-
self.OMG.sourceid_df_lookup(self.transfer,
|
|
814
|
+
self.OMG.sourceid_df_lookup(self.transfer, index)
|
|
815
|
+
# Handling Fixities for PAX Folders
|
|
597
816
|
if self.OMG.algorithm and self.OMG.pax_fixity_flag is True and self.folder_path.endswith(".pax"):
|
|
598
817
|
self.fixities = ET.SubElement(self.transfer, f"{{{self.opexns}}}Fixities")
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
rel_file = os.path.join(rel_path, filename)
|
|
603
|
-
rel_file = rel_file.replace('\\','/')
|
|
604
|
-
abs_file = os.path.abspath(os.path.join(dir,filename))
|
|
605
|
-
self.generate_opex_fixity(abs_file)
|
|
606
|
-
self.fixity.set("path",rel_file)
|
|
607
|
-
file = ET.SubElement(self.files, f"{{{self.opexns}}}File")
|
|
608
|
-
file.set("type", "content")
|
|
609
|
-
file.set("size", str(os.path.getsize(abs_file)))
|
|
610
|
-
file.text = str(rel_file)
|
|
818
|
+
tmp_list_fixity,tmp_list_path = self.OMG.generate_pax_folder_opex_fixity(self.folder_path, self.fixities, self.files, self.OMG.algorithm)
|
|
819
|
+
self.OMG.list_fixity.extend(tmp_list_fixity)
|
|
820
|
+
self.OMG.list_path.extend(tmp_list_path)
|
|
611
821
|
if self.OMG.autoref_flag or self.OMG.input:
|
|
612
822
|
self.OMG.generate_opex_properties(self.xmlroot, index,
|
|
613
823
|
title = self.title,
|
|
614
824
|
description = self.description,
|
|
615
825
|
security = self.security)
|
|
616
|
-
if
|
|
826
|
+
if self.OMG.metadata_flag is not None:
|
|
617
827
|
self.xml_descmeta = ET.SubElement(self.xmlroot,f"{{{self.opexns}}}DescriptiveMetadata")
|
|
618
|
-
self.OMG.generate_descriptive_metadata(self.
|
|
828
|
+
self.OMG.generate_descriptive_metadata(self.xml_descmeta, idx = index)
|
|
619
829
|
|
|
620
830
|
def filter_directories(self, directory: str, sort_key: str = str.casefold) -> list:
|
|
621
831
|
try:
|
|
@@ -623,82 +833,85 @@ class OpexDir(OpexManifestGenerator):
|
|
|
623
833
|
list_directories = sorted([win_256_check(os.path.join(directory, f.name)) for f in os.scandir(directory)
|
|
624
834
|
if not f.name.startswith('.')
|
|
625
835
|
and filter_win_hidden(win_256_check(os.path.join(directory, f.name))) is False
|
|
626
|
-
and f.name
|
|
836
|
+
and not f.name in ('opex_generate.exe','opex_generate.bin')
|
|
837
|
+
and f.name != self.OMG.METAFOLDER
|
|
627
838
|
and f.name != os.path.basename(__file__)],
|
|
628
839
|
key=sort_key)
|
|
629
840
|
elif self.OMG.hidden_flag is True:
|
|
630
|
-
list_directories = sorted([os.path.join(directory, f.name) for f in os.scandir(directory) \
|
|
631
|
-
if f.name != METAFOLDER
|
|
841
|
+
list_directories = sorted([win_256_check(os.path.join(directory, f.name)) for f in os.scandir(directory) \
|
|
842
|
+
if f.name != self.OMG.METAFOLDER
|
|
843
|
+
and not f.name in ('opex_generate.exe','opex_generate.bin')
|
|
632
844
|
and f.name != os.path.basename(__file__)],
|
|
633
845
|
key=sort_key)
|
|
634
846
|
return list_directories
|
|
635
847
|
except Exception as e:
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
raise SystemError()
|
|
848
|
+
logger.exception(f'Failed to Filter Directories: {e}')
|
|
849
|
+
raise
|
|
639
850
|
|
|
640
851
|
def generate_opex_dirs(self, path: str) -> None:
|
|
641
852
|
""""
|
|
642
853
|
This function loops recursively through a given directory.
|
|
643
854
|
|
|
644
|
-
There are two loops to first generate Opexes for Files;
|
|
855
|
+
There are two loops to first generate Opexes for Files; Then Generate the Folder Opex Manifests.
|
|
645
856
|
"""
|
|
646
|
-
|
|
647
|
-
if
|
|
648
|
-
opex_path = os.path.abspath(
|
|
857
|
+
current = OpexDir(self.OMG, path)
|
|
858
|
+
if current.OMG.algorithm and current.OMG.pax_fixity_flag is True and current.folder_path.endswith(".pax"):
|
|
859
|
+
opex_path = os.path.abspath(current.folder_path)
|
|
649
860
|
else:
|
|
650
|
-
opex_path = os.path.join(os.path.abspath(
|
|
861
|
+
opex_path = os.path.join(os.path.abspath(current.folder_path), os.path.basename(current.folder_path))
|
|
651
862
|
#First Loop to Generate Folder Manifest Opexes & Individual File Opexes.
|
|
652
|
-
if
|
|
863
|
+
if current.removal is True:
|
|
653
864
|
#If removal is True for Folder, then it will be removed - Does not need to descend.
|
|
654
865
|
pass
|
|
655
866
|
else:
|
|
656
|
-
for f_path in
|
|
867
|
+
for f_path in current.filter_directories(path):
|
|
657
868
|
if f_path.endswith('.opex'):
|
|
658
869
|
#Ignores OPEX files / directories...
|
|
659
870
|
pass
|
|
660
871
|
elif os.path.isdir(f_path):
|
|
661
|
-
if
|
|
662
|
-
(
|
|
663
|
-
|
|
872
|
+
if current.ignore is True or \
|
|
873
|
+
(current.OMG.removal_flag is True and \
|
|
874
|
+
current.OMG.removal_df_lookup(current.OMG.index_df_lookup(f_path)) is True):
|
|
664
875
|
#If Ignore is True, or the Folder below is marked for Removal: Don't add to Opex
|
|
665
876
|
pass
|
|
666
877
|
else:
|
|
667
878
|
#Add Folder to OPEX Manifest (doesn't get written yet...)
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
if
|
|
671
|
-
#If using fixity, but the folder is a PAX & using PAX Fixity: End descent.
|
|
879
|
+
current.folder = ET.SubElement(self.folders, f"{{{self.opexns}}}Folder")
|
|
880
|
+
current.folder.text = str(os.path.basename(f_path))
|
|
881
|
+
if current.OMG.algorithm and current.OMG.pax_fixity_flag is True and current.folder_path.endswith(".pax"):
|
|
882
|
+
#If using fixity, but the current folder is a PAX & using PAX Fixity: End descent.
|
|
672
883
|
pass
|
|
673
884
|
else:
|
|
674
885
|
#Recurse Descent.
|
|
675
|
-
|
|
886
|
+
current.generate_opex_dirs(f_path)
|
|
676
887
|
elif os.path.isfile(f_path):
|
|
677
888
|
#Processes OPEXes for individual Files: this gets written.
|
|
678
|
-
OpexFile(
|
|
889
|
+
OpexFile(current.OMG, f_path)
|
|
679
890
|
else:
|
|
680
|
-
|
|
891
|
+
logger.warning(f'Unknown File Type at: {f_path}')
|
|
681
892
|
pass
|
|
682
893
|
#Second Loop to add previously generated Opexes to Folder Manifest.
|
|
683
|
-
if
|
|
894
|
+
if current.removal is True or current.ignore is True:
|
|
895
|
+
logger.debug(f'Skipping Opex generation for: {current.folder_path}')
|
|
684
896
|
pass
|
|
685
897
|
else:
|
|
686
898
|
if check_opex(opex_path):
|
|
687
899
|
#Only processing Opexes.
|
|
688
|
-
for f_path in
|
|
900
|
+
for f_path in current.filter_directories(path):
|
|
689
901
|
if os.path.isfile(f_path):
|
|
690
|
-
file = ET.SubElement(
|
|
902
|
+
file = ET.SubElement(current.files, f"{{{current.opexns}}}File")
|
|
691
903
|
if f_path.endswith('.opex'):
|
|
692
904
|
file.set("type", "metadata")
|
|
693
905
|
else:
|
|
694
906
|
file.set("type", "content")
|
|
695
907
|
file.set("size", str(os.path.getsize(f_path)))
|
|
696
908
|
file.text = str(os.path.basename(f_path))
|
|
909
|
+
logger.debug(f'Adding File to Opex Manifest: {f_path}')
|
|
697
910
|
#Writes Folder OPEX
|
|
698
|
-
write_opex(opex_path,
|
|
911
|
+
write_opex(opex_path, current.xmlroot)
|
|
699
912
|
else:
|
|
700
913
|
#Avoids Override if exists, lets you continue where left off.
|
|
701
|
-
|
|
914
|
+
logger.info(f"Avoiding override, Opex exists at: {opex_path}")
|
|
702
915
|
|
|
703
916
|
class OpexFile(OpexManifestGenerator):
|
|
704
917
|
def __init__(self, OMG: OpexManifestGenerator, file_path: str, title: str = None, description: str = None, security: str = None) -> None:
|
|
@@ -709,6 +922,7 @@ class OpexFile(OpexManifestGenerator):
|
|
|
709
922
|
else:
|
|
710
923
|
self.file_path = file_path
|
|
711
924
|
if check_opex(self.file_path):
|
|
925
|
+
index = None
|
|
712
926
|
if any([self.OMG.input,
|
|
713
927
|
self.OMG.autoref_flag in {"c","catalog","a","accession","b","both","cg","catalog-generic","ag","accession-generic","bg","both-generic"},
|
|
714
928
|
self.OMG.ignore_flag,
|
|
@@ -720,8 +934,6 @@ class OpexFile(OpexManifestGenerator):
|
|
|
720
934
|
index = self.OMG.index_df_lookup(self.file_path)
|
|
721
935
|
elif self.OMG.autoref_flag is None or self.OMG.autoref_flag in {"g","generic"}:
|
|
722
936
|
index = None
|
|
723
|
-
else:
|
|
724
|
-
index = None
|
|
725
937
|
self.ignore = False
|
|
726
938
|
self.removal = False
|
|
727
939
|
if self.OMG.ignore_flag:
|
|
@@ -746,25 +958,28 @@ class OpexFile(OpexManifestGenerator):
|
|
|
746
958
|
if security is not None:
|
|
747
959
|
self.security = security
|
|
748
960
|
else:
|
|
749
|
-
self.security = GENERIC_DEFAULT_SECURITY
|
|
961
|
+
self.security = self.GENERIC_DEFAULT_SECURITY
|
|
750
962
|
else:
|
|
751
963
|
self.title = title
|
|
752
964
|
self.description = description
|
|
753
965
|
self.security = security
|
|
966
|
+
opex_path = None
|
|
754
967
|
if self.OMG.algorithm or self.OMG.autoref_flag or self.OMG.input:
|
|
755
968
|
self.xmlroot = ET.Element(f"{{{self.opexns}}}OPEXMetadata", nsmap={"opex":self.opexns})
|
|
756
969
|
self.transfer = ET.SubElement(self.xmlroot, f"{{{self.opexns}}}Transfer")
|
|
757
970
|
if self.OMG.sourceid_flag:
|
|
758
|
-
self.OMG.sourceid_df_lookup(self.transfer,
|
|
971
|
+
self.OMG.sourceid_df_lookup(self.transfer, index)
|
|
759
972
|
if self.OMG.algorithm:
|
|
760
973
|
self.fixities = ET.SubElement(self.transfer, f"{{{self.opexns}}}Fixities")
|
|
761
974
|
if self.OMG.hash_from_spread:
|
|
762
975
|
self.OMG.hash_df_lookup(self.fixities, index)
|
|
763
976
|
else:
|
|
977
|
+
self.OMG.list_path.append(self.file_path)
|
|
764
978
|
if self.OMG.pax_fixity_flag is True and (self.file_path.endswith("pax.zip") or self.file_path.endswith(".pax")):
|
|
765
|
-
self.generate_pax_zip_opex_fixity(self.file_path)
|
|
979
|
+
tmp_list_fixity = self.generate_pax_zip_opex_fixity(self.file_path, self.OMG.algorithm)
|
|
766
980
|
else:
|
|
767
|
-
self.generate_opex_fixity(self.file_path)
|
|
981
|
+
tmp_list_fixity = self.generate_opex_fixity(self.file_path, self.OMG.algorithm)
|
|
982
|
+
self.OMG.list_fixity.extend(tmp_list_fixity)
|
|
768
983
|
if self.transfer is None:
|
|
769
984
|
self.xmlroot.remove(self.transfer)
|
|
770
985
|
if self.OMG.autoref_flag or self.OMG.input:
|
|
@@ -772,11 +987,18 @@ class OpexFile(OpexManifestGenerator):
|
|
|
772
987
|
title = self.title,
|
|
773
988
|
description = self.description,
|
|
774
989
|
security = self.security)
|
|
775
|
-
if
|
|
990
|
+
if self.OMG.metadata_flag is not None:
|
|
776
991
|
self.xml_descmeta = ET.SubElement(self.xmlroot, f"{{{self.opexns}}}DescriptiveMetadata")
|
|
777
992
|
self.OMG.generate_descriptive_metadata(self.xml_descmeta, index)
|
|
778
993
|
opex_path = write_opex(self.file_path, self.xmlroot)
|
|
779
|
-
|
|
780
|
-
|
|
994
|
+
# Zip cannot be activated unless another flag - which
|
|
995
|
+
if self.OMG.zip_flag:
|
|
996
|
+
zip_opex(self.file_path, opex_path)
|
|
997
|
+
if self.OMG.zip_file_removal:
|
|
998
|
+
os.remove(self.file_path)
|
|
999
|
+
if os.path.exists(opex_path):
|
|
1000
|
+
os.remove(opex_path)
|
|
1001
|
+
logger.debug(f'Removed file: {opex_path}')
|
|
1002
|
+
logger.debug(f'Removed file: {self.file_path}')
|
|
781
1003
|
else:
|
|
782
|
-
|
|
1004
|
+
logger.info(f"Avoiding override, Opex exists at: {self.file_path}: ")
|