opex-manifest-generator 1.1.9__py3-none-any.whl → 1.1.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opex_manifest_generator/cli.py +63 -22
- opex_manifest_generator/common.py +38 -2
- opex_manifest_generator/opex_manifest.py +230 -249
- opex_manifest_generator/options.properties +13 -0
- {opex_manifest_generator-1.1.9.dist-info → opex_manifest_generator-1.1.11.dist-info}/METADATA +1 -1
- {opex_manifest_generator-1.1.9.dist-info → opex_manifest_generator-1.1.11.dist-info}/RECORD +10 -10
- {opex_manifest_generator-1.1.9.dist-info → opex_manifest_generator-1.1.11.dist-info}/WHEEL +1 -1
- opex_manifest_generator/test_cli.py +0 -2
- {opex_manifest_generator-1.1.9.dist-info → opex_manifest_generator-1.1.11.dist-info}/LICENSE.md +0 -0
- {opex_manifest_generator-1.1.9.dist-info → opex_manifest_generator-1.1.11.dist-info}/entry_points.txt +0 -0
- {opex_manifest_generator-1.1.9.dist-info → opex_manifest_generator-1.1.11.dist-info}/top_level.txt +0 -0
opex_manifest_generator/cli.py
CHANGED
|
@@ -13,24 +13,66 @@ import importlib.metadata
|
|
|
13
13
|
|
|
14
14
|
def parse_args():
|
|
15
15
|
parser = argparse.ArgumentParser(description = "OPEX Manifest Generator for Preservica Uploads")
|
|
16
|
-
parser.add_argument('root', default = os.getcwd())
|
|
17
|
-
parser.add_argument("-c", "--autoclass", required = False,
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
16
|
+
parser.add_argument('root', default = os.getcwd(), help = "The root path to generate Opexes for")
|
|
17
|
+
parser.add_argument("-c", "--autoclass", required = False,
|
|
18
|
+
choices = ['catalog', 'c', 'accession', 'a', 'both', 'b', 'generic', 'g', 'catalog-generic', 'cg', "accession-generic", "ag", "both-generic", "bg"],
|
|
19
|
+
type = str.lower,
|
|
20
|
+
help="""Toggles whether to utilise the auto_classification_generator
|
|
21
|
+
to generate an on the fly Reference listing.
|
|
22
|
+
|
|
23
|
+
There are several options, {catalog} will generate
|
|
24
|
+
a Archival Reference following an ISAD(G) sturcutre.
|
|
25
|
+
{accession} will create a running number of files.
|
|
26
|
+
{both} will do both at the same time!
|
|
27
|
+
{generic} will populate the title and description fields with the folder/file's name,
|
|
28
|
+
if used in conjunction with one of the above options:
|
|
29
|
+
{generic-catalog,generic-accession, generic-both} it will do both simultaneously.
|
|
30
|
+
""")
|
|
31
|
+
parser.add_argument("-p", "--prefix", required = False, nargs = '+',
|
|
32
|
+
help= """Assign a prefix when utilising the --autoclass option. Prefix will append any text before all generated text.
|
|
33
|
+
When utilising the {both} option fill in like: [catalog-prefix, accession-prefix] without square brackets.
|
|
34
|
+
""")
|
|
35
|
+
parser.add_argument("-fx", "--fixity", required = False, const = "SHA-1", default = None,
|
|
36
|
+
nargs = '?', choices = ['NONE', 'SHA-1', 'MD5', 'SHA-256', 'SHA-512'], type = str.upper,
|
|
37
|
+
help="Generates a hash for each file and adds it to the opex, can select the algorithm to utilise.")
|
|
38
|
+
parser.add_argument("-rme", "--remove-empty", required = False, action = 'store_true', default = False,
|
|
39
|
+
help = "Remove and log empty directories from root. Log will be exported to 'meta' / output folder.")
|
|
40
|
+
parser.add_argument("-o", "--output", required = False, nargs = 1,
|
|
41
|
+
help = "Sets the output to send any generated files to. Will not affect creation of a meta dir.")
|
|
42
|
+
parser.add_argument("-dmd", "--disable-meta-dir", required = False, action = 'store_false',
|
|
43
|
+
help = """Set whether to disable the creation of a 'meta' directory for generated files,
|
|
44
|
+
default behaviour is to always generate this directory""")
|
|
45
|
+
parser.add_argument("-clr", "--clear-opex", required = False, action = 'store_true', default = False,
|
|
46
|
+
help = """Clears existing opex files from a directory. If set with no further options will only clear opexes;
|
|
47
|
+
if multiple options are set will clear opexes and then run the program""")
|
|
48
|
+
parser.add_argument("-opt","--options-file", required = False, default=os.path.join(os.path.dirname(__file__),'options.properties'),
|
|
49
|
+
help="Specify a custom Options file, changing the set presets for column headers (Title,Description,etc)")
|
|
50
|
+
parser.add_argument("-s", "--start-ref", required = False, nargs = '?', default = 1,
|
|
51
|
+
help="Set a custom Starting reference for the Auto Classification generator. The generated reference will")
|
|
52
|
+
parser.add_argument("-mdir","--metadata-dir", required=False, nargs= '?',
|
|
53
|
+
default = os.path.join(os.path.dirname(os.path.realpath(__file__)), "metadata"),
|
|
54
|
+
help="Specify the metadata directory to pull XML files from")
|
|
55
|
+
parser.add_argument("-m", "--metadata", required = False, const = 'e', default = 'none',
|
|
56
|
+
nargs = '?', choices = ['none', 'n', 'exact', 'e', 'flat', 'f'], type = str.lower,
|
|
57
|
+
help="Set whether to include xml metadata fields in the generation of the Opex")
|
|
58
|
+
parser.add_argument("-ex", "--export", required = False, action = 'store_true', default = False,
|
|
59
|
+
help="Set whether to export the generated auto classification references to an AutoClass spreadsheet")
|
|
60
|
+
parser.add_argument("-i", "--input", required = False, nargs='?',
|
|
61
|
+
help="Set to utilise a CSV / XLSX spreadsheet to import data from")
|
|
62
|
+
parser.add_argument("-rm", "--remove", required = False, action = "store_true", default = False,
|
|
63
|
+
help="Set whether to enable removals of files and folders from a directory. ***Currently in testing")
|
|
64
|
+
parser.add_argument("-z", "--zip", required = False, action = 'store_true',
|
|
65
|
+
help="Set to zip files")
|
|
66
|
+
parser.add_argument("-fmt", "--output-format", required = False, default = "xlsx", choices = ['xlsx', 'csv'],
|
|
67
|
+
help="Set whether to output to an xlsx or csv format")
|
|
31
68
|
parser.add_argument("-v", "--version", action = 'version', version = '%(prog)s {version}'.format(version = importlib.metadata.version("opex_manifest_generator")))
|
|
32
|
-
parser.add_argument("--
|
|
33
|
-
|
|
69
|
+
parser.add_argument("--accession-mode", required=False, choices=["file",'directory','both'],
|
|
70
|
+
help="""Set the mode when utilising the Accession option in autoclass.
|
|
71
|
+
file - only adds on files, folder - only adds on folders, both - adds on files and folders""")
|
|
72
|
+
parser.add_argument("--hidden", required = False, action = 'store_true', default = False,
|
|
73
|
+
help="Set whether to include hidden files and folders")
|
|
74
|
+
parser.add_argument("--print-xmls", required = False, action = "store_true", default = False,
|
|
75
|
+
help="Prints the elements from your xmls to the consoles")
|
|
34
76
|
args = parser.parse_args()
|
|
35
77
|
return args
|
|
36
78
|
|
|
@@ -51,11 +93,6 @@ def run_cli():
|
|
|
51
93
|
time.sleep(5)
|
|
52
94
|
if args.print_xmls:
|
|
53
95
|
OpexManifestGenerator.print_descriptive_xmls()
|
|
54
|
-
if args.autoclass:
|
|
55
|
-
pass
|
|
56
|
-
# if not args.prefix:
|
|
57
|
-
# print('A prefix must be set when using Auto-Classification, stopping operation')
|
|
58
|
-
# time.sleep(3); raise SystemExit()
|
|
59
96
|
acc_prefix = None
|
|
60
97
|
if args.prefix:
|
|
61
98
|
if args.autoclass in {"both", "b", "both-generic", "bg"}:
|
|
@@ -113,7 +150,11 @@ def run_cli():
|
|
|
113
150
|
export_flag = args.export,
|
|
114
151
|
meta_dir_flag = args.disable_meta_dir,
|
|
115
152
|
metadata_flag = args.metadata,
|
|
153
|
+
metadata_dir = args.metadata_dir,
|
|
116
154
|
hidden_flag= args.hidden,
|
|
117
155
|
zip_flag = args.zip,
|
|
118
156
|
input = args.input,
|
|
119
157
|
output_format = args.output_format).main()
|
|
158
|
+
|
|
159
|
+
if __name__ == "__main__":
|
|
160
|
+
run_cli()
|
|
@@ -5,7 +5,9 @@ author: Christopher Prince
|
|
|
5
5
|
license: Apache License 2.0"
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
import zipfile, os, sys
|
|
8
|
+
import zipfile, os, sys, time, stat
|
|
9
|
+
import datetime
|
|
10
|
+
from lxml import etree
|
|
9
11
|
|
|
10
12
|
def zip_opex(file_path,opex_path):
|
|
11
13
|
zip_file = f"{file_path}.zip"
|
|
@@ -19,4 +21,38 @@ def win_256_check(path: str):
|
|
|
19
21
|
if len(path) > 255 and sys.platform == "win32":
|
|
20
22
|
if path.startswith(u'\\\\?\\'): path = path
|
|
21
23
|
else: path = u"\\\\?\\" + path
|
|
22
|
-
return path
|
|
24
|
+
return path
|
|
25
|
+
|
|
26
|
+
def filter_win_hidden(path: str):
|
|
27
|
+
if sys.platform =="win32":
|
|
28
|
+
if bool(os.stat(path).st_file_attribute & stat.FILE_ATTRIBUTE_HIDDEN) is True:
|
|
29
|
+
return True
|
|
30
|
+
else:
|
|
31
|
+
return False
|
|
32
|
+
else:
|
|
33
|
+
return False
|
|
34
|
+
|
|
35
|
+
def check_nan(value):
|
|
36
|
+
if str(value).lower() in {"nan","nat"}:
|
|
37
|
+
value = None
|
|
38
|
+
return value
|
|
39
|
+
|
|
40
|
+
def check_opex(opex_path:str):
|
|
41
|
+
opex_path = opex_path + ".opex"
|
|
42
|
+
if os.path.exists(win_256_check(opex_path)):
|
|
43
|
+
return False
|
|
44
|
+
else:
|
|
45
|
+
return True
|
|
46
|
+
|
|
47
|
+
def write_opex(path: str, opexxml: etree.Element):
|
|
48
|
+
opex_path = win_256_check(str(path) + ".opex")
|
|
49
|
+
opex = etree.indent(opexxml, " ")
|
|
50
|
+
opex = etree.tostring(opexxml, pretty_print=True, xml_declaration=True, encoding="UTF-8", standalone=True)
|
|
51
|
+
with open(f'{opex_path}', 'w', encoding="UTF-8") as writer:
|
|
52
|
+
writer.write(opex.decode('UTF-8'))
|
|
53
|
+
print('Saved Opex File to: ' + opex_path)
|
|
54
|
+
return opex_path
|
|
55
|
+
|
|
56
|
+
def print_running_time(start_time):
|
|
57
|
+
print(f'Running time: {datetime.datetime.now() - start_time}')
|
|
58
|
+
time.sleep(5)
|
|
@@ -8,24 +8,22 @@ license: Apache License 2.0"
|
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
10
|
import lxml.etree as ET
|
|
11
|
-
import os
|
|
11
|
+
import os, time, shutil
|
|
12
12
|
from auto_classification_generator import ClassificationGenerator
|
|
13
13
|
from auto_classification_generator.common import export_list_txt, export_xl, export_csv, define_output_file
|
|
14
|
-
|
|
15
|
-
import time
|
|
14
|
+
import datetime
|
|
16
15
|
import pandas as pd
|
|
17
16
|
from pandas.api.types import is_datetime64_any_dtype
|
|
18
17
|
from opex_manifest_generator.hash import HashGenerator
|
|
19
18
|
from opex_manifest_generator.common import *
|
|
20
|
-
import
|
|
21
|
-
import shutil
|
|
19
|
+
import configparser
|
|
22
20
|
|
|
23
21
|
class OpexManifestGenerator():
|
|
24
22
|
def __init__(self,
|
|
25
23
|
root: str,
|
|
26
24
|
output_path: os.path = os.getcwd(),
|
|
27
25
|
meta_dir_flag: bool = True,
|
|
28
|
-
metadata_dir:
|
|
26
|
+
metadata_dir: os = os.path.join(os.path.dirname(os.path.realpath(__file__)), "metadata"),
|
|
29
27
|
metadata_flag: str = 'none',
|
|
30
28
|
autoclass_flag: str = None,
|
|
31
29
|
prefix: str = None,
|
|
@@ -40,13 +38,14 @@ class OpexManifestGenerator():
|
|
|
40
38
|
zip_flag: bool = False,
|
|
41
39
|
hidden_flag: bool = False,
|
|
42
40
|
output_format: str = "xlsx",
|
|
43
|
-
print_xmls_flag: bool = False
|
|
41
|
+
print_xmls_flag: bool = False,
|
|
42
|
+
options_file: str = os.path.join(os.path.dirname(__file__),'options.properties')):
|
|
44
43
|
|
|
45
44
|
self.root = os.path.abspath(root)
|
|
46
45
|
self.opexns = "http://www.openpreservationexchange.org/opex/v1.2"
|
|
47
46
|
self.list_path = []
|
|
48
47
|
self.list_fixity = []
|
|
49
|
-
self.start_time = datetime.now()
|
|
48
|
+
self.start_time = datetime.datetime.now()
|
|
50
49
|
self.algorithm = algorithm
|
|
51
50
|
self.empty_flag = empty_flag
|
|
52
51
|
self.remove_flag = remove_flag
|
|
@@ -59,52 +58,131 @@ class OpexManifestGenerator():
|
|
|
59
58
|
self.prefix = prefix
|
|
60
59
|
self.acc_prefix = acc_prefix
|
|
61
60
|
self.input = input
|
|
62
|
-
self.title_flag = False
|
|
63
|
-
self.description_flag = False
|
|
64
|
-
self.security_flag = False
|
|
65
|
-
self.ignore_flag = False
|
|
66
|
-
self.sourceid_flag = False
|
|
67
|
-
self.hash_from_spread = False
|
|
68
61
|
self.hidden_flag = hidden_flag
|
|
69
62
|
self.zip_flag = zip_flag
|
|
70
63
|
self.output_format = output_format
|
|
71
64
|
self.metadata_flag = metadata_flag
|
|
72
65
|
self.metadata_dir = metadata_dir
|
|
73
66
|
self.print_xmls_flag = print_xmls_flag
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
67
|
+
self.parse_config(options_file=os.path.abspath(options_file))
|
|
68
|
+
|
|
69
|
+
self.title_flag = False
|
|
70
|
+
self.description_flag = False
|
|
71
|
+
self.security_flag = False
|
|
72
|
+
self.ignore_flag = False
|
|
73
|
+
self.sourceid_flag = False
|
|
74
|
+
self.hash_from_spread = False
|
|
75
|
+
|
|
76
|
+
def parse_config(self, options_file: str = 'options.properties'):
|
|
77
|
+
config = configparser.ConfigParser()
|
|
78
|
+
config.read(options_file, encoding='utf-8')
|
|
79
|
+
global INDEX_FIELD
|
|
80
|
+
INDEX_FIELD = config['options']['INDEX_FIELD']
|
|
81
|
+
global TITLE_FIELD
|
|
82
|
+
TITLE_FIELD = config['options']['TITLE_FIELD']
|
|
83
|
+
global DESCRIPTION_FIELD
|
|
84
|
+
DESCRIPTION_FIELD = config['options']['DESCRIPTION_FIELD']
|
|
85
|
+
global SECUIRTY_FIELD
|
|
86
|
+
SECUIRTY_FIELD = config['options']['SECUIRTY_FIELD']
|
|
87
|
+
global IDENTIFIER_FIELD
|
|
88
|
+
IDENTIFIER_FIELD = config['options']['IDENTIFIER_FIELD']
|
|
89
|
+
global IDENTIFIER_DEFAULT
|
|
90
|
+
IDENTIFIER_DEFAULT = config['options']['IDENTIFIER_DEFAULT']
|
|
91
|
+
global REMOVAL_FIELD
|
|
92
|
+
REMOVAL_FIELD = config['options']['REMOVAL_FIELD']
|
|
93
|
+
global IGNORE_FIELD
|
|
94
|
+
IGNORE_FIELD = config['options']['IGNORE_FIELD']
|
|
95
|
+
global SOURCEID_FIELD
|
|
96
|
+
SOURCEID_FIELD = config['options']['SOURCEID_FIELD']
|
|
97
|
+
global HASH_FIELD
|
|
98
|
+
HASH_FIELD = config['options']['HASH_FIELD']
|
|
99
|
+
global ALGORITHM_FIELD
|
|
100
|
+
ALGORITHM_FIELD = config['options']['ALGORITHM_FIELD']
|
|
101
|
+
|
|
102
|
+
def print_descriptive_xmls(self):
|
|
103
|
+
for file in os.scandir(self.metadata_dir):
|
|
104
|
+
path = os.path.join(self.metadata_dir, file.name)
|
|
105
|
+
print(path)
|
|
106
|
+
xml_file = ET.parse(path)
|
|
107
|
+
root_element = ET.QName(xml_file.find('.'))
|
|
108
|
+
root_element_ln = root_element.localname
|
|
109
|
+
for elem in xml_file.findall(".//"):
|
|
110
|
+
elem_path = xml_file.getelementpath(elem)
|
|
111
|
+
elem = ET.QName(elem)
|
|
112
|
+
elem_lnpath = elem_path.replace(f"{{{elem.namespace}}}", root_element_ln + ":")
|
|
113
|
+
print(elem_lnpath)
|
|
114
|
+
|
|
115
|
+
def set_input_flags(self):
|
|
116
|
+
if 'Title' in self.column_headers:
|
|
117
|
+
self.title_flag = True
|
|
118
|
+
if 'Description' in self.column_headers:
|
|
119
|
+
self.description_flag = True
|
|
120
|
+
if 'Security' in self.column_headers:
|
|
121
|
+
self.security_flag = True
|
|
122
|
+
if 'SourceID' in self.column_headers:
|
|
123
|
+
self.sourceid_flag = True
|
|
124
|
+
if 'Ignore' in self.column_headers:
|
|
125
|
+
self.ignore_flag = True
|
|
126
|
+
if 'Hash' in self.column_headers and 'Algorithm' in self.column_headers:
|
|
127
|
+
self.hash_from_spread = True
|
|
128
|
+
print("Hash detected in Spreadsheet; taking hashes from spreadsheet")
|
|
129
|
+
time.sleep(3)
|
|
130
|
+
|
|
131
|
+
def init_df(self):
|
|
132
|
+
if self.autoclass_flag:
|
|
133
|
+
if self.autoclass_flag in {"catalog", "c", "catalog-generic", "cg"}:
|
|
134
|
+
ac = ClassificationGenerator(self.root, output_path = self.output_path, prefix = self.prefix, start_ref = self.startref, empty_flag = self.empty_flag, accession_flag = False)
|
|
135
|
+
elif self.autoclass_flag in {"accession", "a", "accession-generic", "ag", "both", "b", "both-generic", "bg"}:
|
|
136
|
+
ac = ClassificationGenerator(self.root, output_path = self.output_path, prefix = self.prefix, accprefix = self.acc_prefix, start_ref = self.startref, empty_flag = self.empty_flag, accession_flag="File")
|
|
137
|
+
self.df = ac.init_dataframe()
|
|
138
|
+
if self.autoclass_flag in {"accession", "a", "accesion-generic", "ag"}:
|
|
139
|
+
self.df = self.df.drop('Archive_Reference', axis=1)
|
|
140
|
+
self.column_headers = self.df.columns.values.tolist()
|
|
141
|
+
self.set_input_flags()
|
|
142
|
+
if self.export_flag:
|
|
143
|
+
output_path = define_output_file(self.output_path, self.root, meta_dir_flag = self.meta_dir_flag, output_format = self.output_format)
|
|
144
|
+
if self.output_format == "xlsx":
|
|
145
|
+
export_xl(self.df, output_path)
|
|
146
|
+
elif self.output_format == "csv":
|
|
147
|
+
export_csv(self.df, output_path)
|
|
148
|
+
elif self.input:
|
|
149
|
+
if self.input.endswith('xlsx'):
|
|
150
|
+
self.df = pd.read_excel(self.input)
|
|
151
|
+
elif self.input.endswith('csv'):
|
|
152
|
+
self.df = pd.read_csv(self.input)
|
|
153
|
+
self.column_headers = self.df.columns.values.tolist()
|
|
154
|
+
self.set_input_flags()
|
|
155
|
+
else:
|
|
156
|
+
self.df = None
|
|
157
|
+
self.column_headers = None
|
|
158
|
+
|
|
159
|
+
def clear_opex(self):
|
|
160
|
+
walk = list(os.walk(self.root))
|
|
161
|
+
for dir, _, files in walk[::-1]:
|
|
162
|
+
for file in files:
|
|
163
|
+
file_path = win_256_check(os.path.join(dir, file))
|
|
164
|
+
if str(file_path).endswith('.opex'):
|
|
165
|
+
os.remove(file_path)
|
|
166
|
+
print(f'Cleared Opex: {file_path}')
|
|
78
167
|
|
|
79
168
|
def index_df_lookup(self, path: str):
|
|
80
|
-
idx = self.df[
|
|
169
|
+
idx = self.df[INDEX_FIELD].index[self.df[INDEX_FIELD] == path]
|
|
81
170
|
return idx
|
|
82
171
|
|
|
83
|
-
def
|
|
172
|
+
def xip_df_lookup(self, idx: pd.Index):
|
|
84
173
|
try:
|
|
174
|
+
title = None
|
|
175
|
+
description = None
|
|
176
|
+
security = None
|
|
85
177
|
if idx.empty:
|
|
86
|
-
|
|
87
|
-
description = None
|
|
88
|
-
security = None
|
|
178
|
+
pass
|
|
89
179
|
else:
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
if self.description_flag:
|
|
97
|
-
description = self.df['Description'].loc[idx].item()
|
|
98
|
-
if str(description).lower() in {"nan","nat"}:
|
|
99
|
-
description = None
|
|
100
|
-
else:
|
|
101
|
-
description = None
|
|
102
|
-
if self.security_flag:
|
|
103
|
-
security = self.df['Security'].loc[idx].item()
|
|
104
|
-
if str(security).lower() in {"nan","nat"}:
|
|
105
|
-
security = None
|
|
106
|
-
else:
|
|
107
|
-
security = None
|
|
180
|
+
if self.title_flag:
|
|
181
|
+
title = check_nan(self.df[TITLE_FIELD].loc[idx].item())
|
|
182
|
+
if self.description_flag:
|
|
183
|
+
description = check_nan(self.df[DESCRIPTION_FIELD].loc[idx].item())
|
|
184
|
+
if self.security_flag:
|
|
185
|
+
security = check_nan(self.df[SECUIRTY_FIELD].loc[idx].item())
|
|
108
186
|
return title,description,security
|
|
109
187
|
except Exception as e:
|
|
110
188
|
print('Error Looking up XIP Metadata')
|
|
@@ -115,10 +193,8 @@ class OpexManifestGenerator():
|
|
|
115
193
|
if idx.empty:
|
|
116
194
|
return False
|
|
117
195
|
else:
|
|
118
|
-
remove = self.df[
|
|
119
|
-
if
|
|
120
|
-
return False
|
|
121
|
-
elif bool(remove):
|
|
196
|
+
remove = check_nan(self.df[REMOVAL_FIELD].loc[idx].item())
|
|
197
|
+
if remove:
|
|
122
198
|
print(f"Removing: {path}")
|
|
123
199
|
# Not functioning correctly
|
|
124
200
|
if os.path.isdir(path):
|
|
@@ -137,15 +213,10 @@ class OpexManifestGenerator():
|
|
|
137
213
|
if idx.empty:
|
|
138
214
|
return False
|
|
139
215
|
else:
|
|
140
|
-
ignore = self.df[
|
|
141
|
-
|
|
142
|
-
return False
|
|
143
|
-
elif str(ignore).lower() in {"true", "1.0"}:
|
|
144
|
-
return True
|
|
145
|
-
elif str(ignore).lower() in {"false", "0.0"}:
|
|
146
|
-
return False
|
|
216
|
+
ignore = check_nan(self.df[IGNORE_FIELD].loc[idx].item())
|
|
217
|
+
return bool(ignore)
|
|
147
218
|
except Exception as e:
|
|
148
|
-
print('Error looking up
|
|
219
|
+
print('Error looking up Ignore')
|
|
149
220
|
print(e)
|
|
150
221
|
|
|
151
222
|
def sourceid_df_lookup(self, xml_element: ET.SubElement, idx: pd.Index):
|
|
@@ -153,149 +224,54 @@ class OpexManifestGenerator():
|
|
|
153
224
|
if idx.empty:
|
|
154
225
|
pass
|
|
155
226
|
else:
|
|
156
|
-
sourceid = self.df[
|
|
157
|
-
if
|
|
158
|
-
pass
|
|
159
|
-
else:
|
|
227
|
+
sourceid = check_nan(self.df[SOURCEID_FIELD].loc[idx].item())
|
|
228
|
+
if sourceid:
|
|
160
229
|
source_xml = ET.SubElement(xml_element,f"{{{self.opexns}}}SourceID")
|
|
161
230
|
source_xml.text = str(sourceid)
|
|
162
231
|
except Exception as e:
|
|
163
232
|
print('Error looking up SourceID')
|
|
164
233
|
print(e)
|
|
165
234
|
|
|
166
|
-
def hash_df_lookup(self,
|
|
235
|
+
def hash_df_lookup(self, xml_fixities: ET.SubElement, idx: pd.Index):
|
|
167
236
|
try:
|
|
168
237
|
if idx.empty:
|
|
169
238
|
pass
|
|
170
239
|
else:
|
|
171
240
|
self.fixity = ET.SubElement(xml_fixities,f"{{{self.opexns}}}Fixity")
|
|
172
|
-
self.hash = self.df[
|
|
173
|
-
self.algorithm = self.df[
|
|
174
|
-
|
|
175
|
-
self.fixity.set(
|
|
176
|
-
self.fixity.set("value",self.hash)
|
|
241
|
+
self.hash = self.df[HASH_FIELD].loc[idx].item()
|
|
242
|
+
self.algorithm = self.df[ALGORITHM_FIELD].loc[idx].item()
|
|
243
|
+
self.fixity.set('type', self.algorithm)
|
|
244
|
+
self.fixity.set('value',self.hash)
|
|
177
245
|
except Exception as e:
|
|
178
|
-
print('Error looking up
|
|
246
|
+
print('Error looking up Hash')
|
|
179
247
|
print(e)
|
|
180
248
|
|
|
181
|
-
def ident_df_lookup(self, idx: pd.Index,
|
|
249
|
+
def ident_df_lookup(self, idx: pd.Index, default_key: str = None):
|
|
182
250
|
try:
|
|
183
251
|
if idx.empty:
|
|
184
|
-
|
|
185
|
-
self.identifier = ET.SubElement(self.identifiers,f"{{{self.opexns}}}Identifier")
|
|
186
|
-
if key_override is None:
|
|
187
|
-
key_name = "code"
|
|
188
|
-
else:
|
|
189
|
-
key_name = key_override
|
|
190
|
-
self.identifier.set("type",key_name)
|
|
191
|
-
self.identifier.text = ident
|
|
252
|
+
pass
|
|
192
253
|
else:
|
|
193
254
|
for header in self.column_headers:
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
if '
|
|
197
|
-
key_name = str(header).
|
|
198
|
-
elif
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
key_name = "code"
|
|
255
|
+
ident = None
|
|
256
|
+
if any(s in header for s in {IDENTIFIER_FIELD,'Archive_Reference','Accession_Reference'}):
|
|
257
|
+
if f'{IDENTIFIER_FIELD}:' in header:
|
|
258
|
+
key_name = str(header).split(':',1)[-1]
|
|
259
|
+
elif IDENTIFIER_FIELD in header:
|
|
260
|
+
key_name = IDENTIFIER_DEFAULT
|
|
261
|
+
elif 'Archive_Reference' in header:
|
|
262
|
+
key_name = IDENTIFIER_DEFAULT
|
|
263
|
+
elif 'Accession_Reference' in header:
|
|
264
|
+
key_name = "accref"
|
|
205
265
|
else:
|
|
206
|
-
key_name =
|
|
207
|
-
|
|
208
|
-
ident
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
self.identifier = ET.SubElement(self.identifiers, f"{{{self.opexns}}}Identifier")
|
|
213
|
-
self.identifier.set("type", key_name)
|
|
214
|
-
self.identifier.text = str(ident)
|
|
266
|
+
key_name = IDENTIFIER_DEFAULT
|
|
267
|
+
ident = check_nan(self.df[header].loc[idx].item())
|
|
268
|
+
if ident:
|
|
269
|
+
self.identifier = ET.SubElement(self.identifiers, f"{{{self.opexns}}}Identifier")
|
|
270
|
+
self.identifier.set("type", key_name)
|
|
271
|
+
self.identifier.text = str(ident)
|
|
215
272
|
except Exception as e:
|
|
216
273
|
print('Error looking up Identifiers')
|
|
217
274
|
print(e)
|
|
218
|
-
|
|
219
|
-
def check_opex(self, opex_path: str):
|
|
220
|
-
opex_path = opex_path + ".opex"
|
|
221
|
-
if os.path.exists(win_256_check(opex_path)):
|
|
222
|
-
return False
|
|
223
|
-
else:
|
|
224
|
-
return True
|
|
225
|
-
|
|
226
|
-
def write_opex(self, path: str, opexxml: ET.Element):
|
|
227
|
-
opex_path = win_256_check(str(path) + ".opex")
|
|
228
|
-
opex = ET.indent(opexxml, " ")
|
|
229
|
-
opex = ET.tostring(opexxml, pretty_print=True, xml_declaration=True, encoding="UTF-8", standalone=True)
|
|
230
|
-
with open(f'{opex_path}', 'w', encoding="UTF-8") as writer:
|
|
231
|
-
writer.write(opex.decode('UTF-8'))
|
|
232
|
-
print('Saved Opex File to: ' + opex_path)
|
|
233
|
-
return opex_path
|
|
234
|
-
|
|
235
|
-
def init_df(self):
|
|
236
|
-
if self.autoclass_flag:
|
|
237
|
-
if self.autoclass_flag in {"catalog", "c", "catalog-generic", "cg"}:
|
|
238
|
-
ac = ClassificationGenerator(self.root, output_path = self.output_path, prefix = self.prefix, start_ref = self.startref, empty_flag = self.empty_flag, accession_flag = False)
|
|
239
|
-
elif self.autoclass_flag in {"accession", "a", "accession-generic", "ag", "both", "b", "both-generic", "bg"}:
|
|
240
|
-
ac = ClassificationGenerator(self.root, output_path = self.output_path, prefix = self.prefix, accprefix = self.acc_prefix, start_ref = self.startref, empty_flag = self.empty_flag, accession_flag="File")
|
|
241
|
-
self.df = ac.init_dataframe()
|
|
242
|
-
if self.autoclass_flag in {"accession", "a", "accesion-generic", "ag"}:
|
|
243
|
-
self.df = self.df.drop('Archive_Reference', axis=1)
|
|
244
|
-
self.column_headers = self.df.columns.values.tolist()
|
|
245
|
-
if self.export_flag:
|
|
246
|
-
output_path = define_output_file(self.output_path, self.root, meta_dir_flag = self.meta_dir_flag, output_format = self.output_format)
|
|
247
|
-
if self.output_format == "xlsx":
|
|
248
|
-
export_xl(self.df, output_path)
|
|
249
|
-
elif self.output_format == "csv":
|
|
250
|
-
export_csv(self.df, output_path)
|
|
251
|
-
elif self.input:
|
|
252
|
-
if self.input.endswith('xlsx'):
|
|
253
|
-
self.df = pd.read_excel(self.input)
|
|
254
|
-
elif self.input.endswith('csv'):
|
|
255
|
-
self.df = pd.read_csv(self.input)
|
|
256
|
-
self.column_headers = self.df.columns.values.tolist()
|
|
257
|
-
self.set_input_flags()
|
|
258
|
-
else:
|
|
259
|
-
self.df = None
|
|
260
|
-
self.column_headers = None
|
|
261
|
-
|
|
262
|
-
def clear_opex(self):
|
|
263
|
-
walk = list(os.walk(self.root))
|
|
264
|
-
for dir, _, files in walk[::-1]:
|
|
265
|
-
for file in files:
|
|
266
|
-
file_path = win_256_check(os.path.join(dir, file))
|
|
267
|
-
if str(file_path).endswith('.opex'):
|
|
268
|
-
os.remove(file_path)
|
|
269
|
-
print(f'Cleared Opex: {file_path}')
|
|
270
|
-
|
|
271
|
-
def set_input_flags(self):
|
|
272
|
-
if 'Title' in self.column_headers:
|
|
273
|
-
self.title_flag = True
|
|
274
|
-
if 'Description' in self.column_headers:
|
|
275
|
-
self.description_flag = True
|
|
276
|
-
if 'Security' in self.column_headers:
|
|
277
|
-
self.security_flag = True
|
|
278
|
-
if 'SourceID' in self.column_headers:
|
|
279
|
-
self.sourceid_flag = True
|
|
280
|
-
if 'Ignore' in self.column_headers:
|
|
281
|
-
self.ignore_flag = True
|
|
282
|
-
if 'Hash' in self.column_headers and 'Algorithm' in self.column_headers:
|
|
283
|
-
self.hash_from_spread = True
|
|
284
|
-
print("Hash detected in Spreadsheet; taking hashes from spreadsheet")
|
|
285
|
-
time.sleep(3)
|
|
286
|
-
|
|
287
|
-
def print_descriptive_xmls(self):
|
|
288
|
-
for file in os.scandir(self.metadata_dir):
|
|
289
|
-
path = os.path.join(self.metadata_dir, file.name)
|
|
290
|
-
print(path)
|
|
291
|
-
xml_file = ET.parse(path)
|
|
292
|
-
root_element = ET.QName(xml_file.find('.'))
|
|
293
|
-
root_element_ln = root_element.localname
|
|
294
|
-
for elem in xml_file.findall(".//"):
|
|
295
|
-
elem_path = xml_file.getelementpath(elem)
|
|
296
|
-
elem = ET.QName(elem)
|
|
297
|
-
elem_lnpath = elem_path.replace(f"{{{elem.namespace}}}", root_element_ln + ":")
|
|
298
|
-
print(elem_lnpath)
|
|
299
275
|
|
|
300
276
|
def init_generate_descriptive_metadata(self):
|
|
301
277
|
self.xml_files = []
|
|
@@ -329,15 +305,19 @@ class OpexManifestGenerator():
|
|
|
329
305
|
if len(list_xml) > 0:
|
|
330
306
|
self.xml_files.append({'data': list_xml, 'localname': root_element_ln, 'xmlfile': path})
|
|
331
307
|
|
|
332
|
-
def generate_descriptive_metadata(self,
|
|
308
|
+
def generate_descriptive_metadata(self, xml_desc_elem: ET.Element, idx: pd.Index):
|
|
309
|
+
"""
|
|
310
|
+
Composes the data into an xml file.
|
|
311
|
+
"""
|
|
333
312
|
for xml_file in self.xml_files:
|
|
334
313
|
list_xml = xml_file.get('data')
|
|
335
314
|
localname = xml_file.get('localname')
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
315
|
+
if len(list_xml) == 0:
|
|
316
|
+
pass
|
|
317
|
+
else:
|
|
318
|
+
if idx.empty:
|
|
319
|
+
pass
|
|
320
|
+
else:
|
|
341
321
|
xml_new = ET.parse(xml_file.get('xmlfile'))
|
|
342
322
|
for elem_dict in list_xml:
|
|
343
323
|
name = elem_dict.get('Name')
|
|
@@ -345,21 +325,28 @@ class OpexManifestGenerator():
|
|
|
345
325
|
ns = elem_dict.get('Namespace')
|
|
346
326
|
try:
|
|
347
327
|
if self.metadata_flag in {'e', 'exact'}:
|
|
348
|
-
val = self.df.loc[idx
|
|
328
|
+
val = check_nan(self.df[path].loc[idx].item())
|
|
349
329
|
elif self.metadata_flag in {'f', 'flat'}:
|
|
350
|
-
val = self.df.loc[idx
|
|
351
|
-
if
|
|
330
|
+
val = check_nan(self.df[name].loc[idx].item())
|
|
331
|
+
if val is None:
|
|
352
332
|
continue
|
|
353
333
|
else:
|
|
354
334
|
if is_datetime64_any_dtype(str(val)):
|
|
355
335
|
val = pd.to_datetime(val)
|
|
356
|
-
val = datetime.strftime(val, "%Y-%m-%dT%H
|
|
336
|
+
val = datetime.datetime.strftime(val, "%Y-%m-%dT%H:%M:%S.000Z")
|
|
337
|
+
if self.metadata_flag in {'e','exact'}:
|
|
338
|
+
n = path.replace(localname + ":", f"{{{ns}}}")
|
|
339
|
+
elem = xml_new.find(f'/{n}')
|
|
340
|
+
elif self.metadata_flag in {'f', 'flat'}:
|
|
341
|
+
n = name.split(':')[-1]
|
|
342
|
+
elem = xml_new.find(f'//{{{ns}}}{n}')
|
|
343
|
+
elem.text = str(val)
|
|
357
344
|
except KeyError as e:
|
|
358
345
|
print('Key Error: please ensure column header\'s are an exact match...')
|
|
359
346
|
print(f'Missing Column: {e}')
|
|
360
347
|
print('Alternatively use flat mode...')
|
|
361
348
|
time.sleep(3)
|
|
362
|
-
raise
|
|
349
|
+
raise SystemExit()
|
|
363
350
|
except IndexError as e:
|
|
364
351
|
print("""Index Error; it is likely you have removed or added a file/folder to the directory \
|
|
365
352
|
after generating the spreadsheet. An opex will still be generated but with no xml metadata. \
|
|
@@ -367,25 +354,11 @@ class OpexManifestGenerator():
|
|
|
367
354
|
print(f'Error: {e}')
|
|
368
355
|
time.sleep(3)
|
|
369
356
|
break
|
|
370
|
-
|
|
371
|
-
continue
|
|
372
|
-
if self.metadata_flag in {'e','exact'}:
|
|
373
|
-
n = path.replace(localname + ":", f"{{{ns}}}")
|
|
374
|
-
elem = xml_new.find(f'/{n}')
|
|
375
|
-
elif self.metadata_flag in {'f', 'flat'}:
|
|
376
|
-
n = name.split(':')[-1]
|
|
377
|
-
elem = xml_new.find(f'//{{{ns}}}{n}')
|
|
378
|
-
elem.text = str(val)
|
|
379
|
-
xml_desc.append(xml_new.find('.'))
|
|
380
|
-
else:
|
|
381
|
-
pass
|
|
382
|
-
else:
|
|
383
|
-
pass
|
|
357
|
+
xml_desc_elem.append(xml_new.find('.'))
|
|
384
358
|
|
|
385
|
-
def generate_opex_properties(self, xmlroot: ET.Element, idx: int,
|
|
386
|
-
|
|
359
|
+
def generate_opex_properties(self, xmlroot: ET.Element, idx: int, title: str = None,
|
|
360
|
+
description: str = None, security: str = None):
|
|
387
361
|
self.properties = ET.SubElement(xmlroot, f"{{{self.opexns}}}Properties")
|
|
388
|
-
self.identifiers = ET.SubElement(self.properties, f"{{{self.opexns}}}Identifiers")
|
|
389
362
|
if title:
|
|
390
363
|
self.titlexml = ET.SubElement(self.properties, f"{{{self.opexns}}}Title")
|
|
391
364
|
self.titlexml.text = str(title)
|
|
@@ -395,12 +368,9 @@ class OpexManifestGenerator():
|
|
|
395
368
|
if security:
|
|
396
369
|
self.securityxml = ET.SubElement(self.properties, f"{{{self.opexns}}}SecurityDescriptor")
|
|
397
370
|
self.securityxml.text = str(security)
|
|
398
|
-
if self.autoclass_flag in {"generic", "g"}:
|
|
399
|
-
self.
|
|
400
|
-
elif self.autoclass_flag not in {"generic", "g"} or self.input:
|
|
371
|
+
if self.autoclass_flag not in {"generic", "g"} or self.input:
|
|
372
|
+
self.identifiers = ET.SubElement(self.properties, f"{{{self.opexns}}}Identifiers")
|
|
401
373
|
self.ident_df_lookup(idx)
|
|
402
|
-
if self.identifiers is None:
|
|
403
|
-
self.properties.remove(self.identifiers)
|
|
404
374
|
if self.properties is None:
|
|
405
375
|
xmlroot.remove(self.properties)
|
|
406
376
|
|
|
@@ -423,7 +393,7 @@ class OpexManifestGenerator():
|
|
|
423
393
|
if self.autoclass_flag or self.algorithm or self.input:
|
|
424
394
|
pass
|
|
425
395
|
else:
|
|
426
|
-
self.
|
|
396
|
+
print_running_time(self.start_time)
|
|
427
397
|
print('Cleared OPEXES. No additional arguments passed, so ending program.'); time.sleep(3)
|
|
428
398
|
raise SystemExit()
|
|
429
399
|
if self.empty_flag:
|
|
@@ -437,7 +407,7 @@ class OpexManifestGenerator():
|
|
|
437
407
|
if self.algorithm:
|
|
438
408
|
output_path = define_output_file(self.output_path, self.root, self.meta_dir_flag, output_suffix = "_Fixities", output_format = "txt")
|
|
439
409
|
export_list_txt(self.list_fixity, output_path)
|
|
440
|
-
self.
|
|
410
|
+
print_running_time(self.start_time)
|
|
441
411
|
|
|
442
412
|
class OpexDir(OpexManifestGenerator):
|
|
443
413
|
def __init__(self, OMG: OpexManifestGenerator, folder_path: str, title: str = None, description: str = None, security: str = None):
|
|
@@ -448,19 +418,17 @@ class OpexDir(OpexManifestGenerator):
|
|
|
448
418
|
self.folder_path = folder_path.replace(u'\\\\?\\', "")
|
|
449
419
|
else:
|
|
450
420
|
self.folder_path = folder_path
|
|
451
|
-
if self.OMG.
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
elif self.OMG.autoclass_flag is None or self.OMG.autoclass_flag in {"g", "generic"}:
|
|
456
|
-
self.index = None
|
|
421
|
+
if self.OMG.autoclass_flag not in {None, "g","generic"}:
|
|
422
|
+
index = self.OMG.index_df_lookup(self.folder_path)
|
|
423
|
+
else:
|
|
424
|
+
index = None
|
|
457
425
|
if self.OMG.ignore_flag or self.OMG.remove_flag:
|
|
458
426
|
if self.OMG.ignore_flag:
|
|
459
|
-
self.ignore = self.OMG.ignore_df_lookup(
|
|
427
|
+
self.ignore = self.OMG.ignore_df_lookup(index)
|
|
460
428
|
if self.ignore:
|
|
461
429
|
return
|
|
462
430
|
if self.OMG.remove_flag:
|
|
463
|
-
self.removal = self.OMG.remove_df_lookup(self.folder_path,
|
|
431
|
+
self.removal = self.OMG.remove_df_lookup(self.folder_path, index)
|
|
464
432
|
if self.removal:
|
|
465
433
|
return
|
|
466
434
|
else:
|
|
@@ -474,7 +442,7 @@ class OpexDir(OpexManifestGenerator):
|
|
|
474
442
|
self.files = ET.SubElement(self.manifest, f"{{{self.opexns}}}Files")
|
|
475
443
|
|
|
476
444
|
if self.OMG.title_flag or self.OMG.description_flag or self.OMG.security_flag:
|
|
477
|
-
self.title, self.description, self.security = self.OMG.
|
|
445
|
+
self.title, self.description, self.security = self.OMG.xip_df_lookup(index)
|
|
478
446
|
elif self.OMG.autoclass_flag in {"generic", "g", "catalog-generic", "cg", "accession-generic", "ag", "both-generic", "bg"}:
|
|
479
447
|
self.title = os.path.basename(self.folder_path)
|
|
480
448
|
self.description = os.path.basename(self.folder_path)
|
|
@@ -484,29 +452,36 @@ class OpexDir(OpexManifestGenerator):
|
|
|
484
452
|
self.description = description
|
|
485
453
|
self.security = security
|
|
486
454
|
if self.OMG.sourceid_flag:
|
|
487
|
-
self.OMG.sourceid_df_lookup(self.transfer, self.folder_path,
|
|
455
|
+
self.OMG.sourceid_df_lookup(self.transfer, self.folder_path, index)
|
|
488
456
|
if self.OMG.autoclass_flag or self.OMG.input:
|
|
489
|
-
self.OMG.generate_opex_properties(self.xmlroot,
|
|
457
|
+
self.OMG.generate_opex_properties(self.xmlroot, index,
|
|
490
458
|
title = self.title,
|
|
491
459
|
description = self.description,
|
|
492
460
|
security = self.security)
|
|
493
461
|
if not self.OMG.metadata_flag in {'none', 'n'}:
|
|
494
462
|
self.xml_descmeta = ET.SubElement(self.xmlroot,f"{{{self.opexns}}}DescriptiveMetadata")
|
|
495
|
-
self.OMG.generate_descriptive_metadata(self.xmlroot, idx =
|
|
463
|
+
self.OMG.generate_descriptive_metadata(self.xmlroot, idx = index)
|
|
496
464
|
|
|
497
|
-
def filter_directories(self, directory: str):
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
465
|
+
def filter_directories(self, directory: str, sort_key: str = str.casefold):
|
|
466
|
+
try:
|
|
467
|
+
if self.OMG.hidden_flag is False:
|
|
468
|
+
list_directories = sorted([win_256_check(os.path.join(directory, f.name)) for f in os.scandir(directory)
|
|
469
|
+
if not f.name.startswith('.')
|
|
470
|
+
and filter_win_hidden(win_256_check(os.path.join(directory, f.name))) is False
|
|
471
|
+
and f.name != 'meta'
|
|
472
|
+
and f.name != os.path.basename(__file__)],
|
|
473
|
+
key=sort_key)
|
|
474
|
+
elif self.OMG.hidden_flag is True:
|
|
475
|
+
list_directories = sorted([os.path.join(directory, f.name) for f in os.scandir(directory) \
|
|
476
|
+
if f.name != 'meta'
|
|
477
|
+
and f.name != os.path.basename(__file__)],
|
|
478
|
+
key=sort_key)
|
|
479
|
+
return list_directories
|
|
480
|
+
except Exception as e:
|
|
481
|
+
print('Failed to Filter')
|
|
482
|
+
print(e)
|
|
483
|
+
raise SystemError()
|
|
484
|
+
|
|
510
485
|
|
|
511
486
|
def generate_opex_dirs(self, path: str):
|
|
512
487
|
self = OpexDir(self.OMG, path)
|
|
@@ -521,7 +496,7 @@ class OpexDir(OpexManifestGenerator):
|
|
|
521
496
|
self.generate_opex_dirs(f_path)
|
|
522
497
|
else:
|
|
523
498
|
OpexFile(self.OMG, f_path, self.OMG.algorithm)
|
|
524
|
-
if
|
|
499
|
+
if check_opex(opex_path):
|
|
525
500
|
if not self.ignore:
|
|
526
501
|
for f_path in self.filter_directories(path):
|
|
527
502
|
if os.path.isfile(f_path):
|
|
@@ -532,7 +507,7 @@ class OpexDir(OpexManifestGenerator):
|
|
|
532
507
|
file.set("type", "content")
|
|
533
508
|
file.set("size", str(os.path.getsize(f_path)))
|
|
534
509
|
file.text = str(os.path.basename(f_path))
|
|
535
|
-
|
|
510
|
+
write_opex(opex_path, self.xmlroot)
|
|
536
511
|
else:
|
|
537
512
|
print(f"Avoiding override, Opex exists at: {opex_path}")
|
|
538
513
|
|
|
@@ -544,26 +519,32 @@ class OpexFile(OpexManifestGenerator):
|
|
|
544
519
|
self.file_path = file_path.replace(u'\\\\?\\', "")
|
|
545
520
|
else:
|
|
546
521
|
self.file_path = file_path
|
|
547
|
-
if
|
|
548
|
-
if self.OMG.input
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
self.
|
|
522
|
+
if check_opex(self.file_path):
|
|
523
|
+
if any([self.OMG.input,
|
|
524
|
+
self.OMG.autoclass_flag in {"c","catalog","a","accession","b","both","cg","catalog-generic","ag","accession-generic","bg","both-generic"},
|
|
525
|
+
self.OMG.ignore_flag,
|
|
526
|
+
self.OMG.remove_flag,
|
|
527
|
+
self.OMG.sourceid_flag,
|
|
528
|
+
self.OMG.title_flag,
|
|
529
|
+
self.OMG.description_flag,
|
|
530
|
+
self.OMG.security_flag]):
|
|
531
|
+
index = self.OMG.index_df_lookup(self.file_path)
|
|
552
532
|
elif self.OMG.autoclass_flag is None or self.OMG.autoclass_flag in {"g","generic"}:
|
|
553
|
-
|
|
533
|
+
index = None
|
|
554
534
|
if self.OMG.ignore_flag:
|
|
555
|
-
self.ignore = self.OMG.ignore_df_lookup(
|
|
535
|
+
self.ignore = self.OMG.ignore_df_lookup(index)
|
|
556
536
|
if self.ignore:
|
|
537
|
+
#WTF is this?
|
|
557
538
|
return
|
|
558
539
|
if self.OMG.remove_flag:
|
|
559
|
-
removal = self.OMG.remove_df_lookup(self.file_path,
|
|
540
|
+
removal = self.OMG.remove_df_lookup(self.file_path, index)
|
|
560
541
|
if removal:
|
|
561
542
|
return
|
|
562
543
|
else:
|
|
563
544
|
self.ignore = False
|
|
564
545
|
self.algorithm = algorithm
|
|
565
546
|
if self.OMG.title_flag or self.OMG.description_flag or self.OMG.security_flag:
|
|
566
|
-
self.title, self.description, self.security = self.OMG.
|
|
547
|
+
self.title, self.description, self.security = self.OMG.xip_df_lookup(index)
|
|
567
548
|
elif self.OMG.autoclass_flag in {"generic", "g", "catalog-generic", "cg", "accession-generic", "ag", "both-generic", "bg"}:
|
|
568
549
|
self.title = os.path.splitext(os.path.basename(self.file_path))[0]
|
|
569
550
|
self.description = os.path.splitext(os.path.basename(self.file_path))[0]
|
|
@@ -580,20 +561,20 @@ class OpexFile(OpexManifestGenerator):
|
|
|
580
561
|
if self.OMG.algorithm:
|
|
581
562
|
self.fixities = ET.SubElement(self.transfer, f"{{{self.opexns}}}Fixities")
|
|
582
563
|
if self.OMG.hash_from_spread:
|
|
583
|
-
self.OMG.hash_df_lookup(self.
|
|
564
|
+
self.OMG.hash_df_lookup(self.fixities, index)
|
|
584
565
|
else:
|
|
585
566
|
self.genererate_opex_fixity(self.file_path)
|
|
586
567
|
if self.transfer is None:
|
|
587
568
|
self.xmlroot.remove(self.transfer)
|
|
588
569
|
if self.OMG.autoclass_flag or self.OMG.input:
|
|
589
|
-
self.OMG.generate_opex_properties(self.xmlroot,
|
|
570
|
+
self.OMG.generate_opex_properties(self.xmlroot, index,
|
|
590
571
|
title = self.title,
|
|
591
572
|
description = self.description,
|
|
592
573
|
security = self.security)
|
|
593
574
|
if not self.OMG.metadata_flag in {'none','n'}:
|
|
594
575
|
self.xml_descmeta = ET.SubElement(self.xmlroot, f"{{{self.opexns}}}DescriptiveMetadata")
|
|
595
|
-
self.OMG.generate_descriptive_metadata(self.xml_descmeta,
|
|
596
|
-
opex_path =
|
|
576
|
+
self.OMG.generate_descriptive_metadata(self.xml_descmeta, index)
|
|
577
|
+
opex_path = write_opex(self.file_path, self.xmlroot)
|
|
597
578
|
if self.OMG.zip_flag:
|
|
598
579
|
zip_opex(self.file_path, opex_path)
|
|
599
580
|
else:
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
[options]
|
|
2
|
+
|
|
3
|
+
INDEX_FIELD = "FullName"
|
|
4
|
+
TITLE_FIELD = "Title"
|
|
5
|
+
DESCRIPTION_FIELD = "Description"
|
|
6
|
+
SECUIRTY_FIELD = "Security"
|
|
7
|
+
IDENTIFIER_FIELD = "Identifier"
|
|
8
|
+
IDENTIFIER_DEFAULT = "code"
|
|
9
|
+
REMOVAL_FIELD = "Removals"
|
|
10
|
+
IGNORE_FIELD = "Ignore"
|
|
11
|
+
SOURCEID_FIELD = "SourceID"
|
|
12
|
+
HASH_FIELD = "Hash"
|
|
13
|
+
ALGORITHM_FIELD = "Algorithm"
|
{opex_manifest_generator-1.1.9.dist-info → opex_manifest_generator-1.1.11.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: opex_manifest_generator
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.11
|
|
4
4
|
Summary: Opex Manifest Generator Tool for use with Opex / Preservica
|
|
5
5
|
Author-email: Christopher Prince <c.pj.prince@gmail.com>
|
|
6
6
|
Project-URL: Homepage, https://github.com/CPJPRINCE/opex_manifest_generator
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
opex_manifest_generator/__init__.py,sha256=fsN-dLjNMn-AfjeG2jkryzIa69bY09B5xLTqWedOv_w,461
|
|
2
|
-
opex_manifest_generator/cli.py,sha256=
|
|
3
|
-
opex_manifest_generator/common.py,sha256=
|
|
2
|
+
opex_manifest_generator/cli.py,sha256=6-ZbFZQT82aUYA5aNYLV4xteEw-pMgXsQr_yOCDklYM,10121
|
|
3
|
+
opex_manifest_generator/common.py,sha256=dJGRQicA7B9n5WXpG5pJlV0fzO0itF6GyDXWTJ-5mxs,1782
|
|
4
4
|
opex_manifest_generator/hash.py,sha256=mpLP-BqqDC7BuQQ5TEc4jaIBGabW-qVr128JaZq54mY,992
|
|
5
|
-
opex_manifest_generator/opex_manifest.py,sha256=
|
|
6
|
-
opex_manifest_generator/
|
|
5
|
+
opex_manifest_generator/opex_manifest.py,sha256=NkvgVR2C5QIJPq3kIcjjAURh3uaO4P2CGpEiIP76N6Q,29209
|
|
6
|
+
opex_manifest_generator/options.properties,sha256=wLa_PjlOjzH1t88edz18wgTFC6F0it8ImPK9xUrQo9g,308
|
|
7
7
|
opex_manifest_generator/metadata/DublinCore Template.xml,sha256=csNGXzSH27Whs4BQNuwMZl8nLSdDq7Y_OblTfzeBqWQ,775
|
|
8
8
|
opex_manifest_generator/metadata/EAD Template.xml,sha256=OsWjUfKiLAsc1zqm56auyFSDYMuZpu6a49AFgqZNzlU,2167
|
|
9
9
|
opex_manifest_generator/metadata/GDPR Template.xml,sha256=r6WTATRVt1sr7VCdaXOwz4vLgU3IgFFx7oRmPnORBWc,475
|
|
@@ -14,9 +14,9 @@ opex_manifest_generator/samples/spreads/dctemplate.xlsx,sha256=R33CNcBK3s-KFYEhm
|
|
|
14
14
|
opex_manifest_generator/samples/spreads/eadtemplate.xlsx,sha256=4lrP0LLZXwv73fl3fvqr7yqpdDJuRj2D4ZADQ4OS6Ps,19299
|
|
15
15
|
opex_manifest_generator/samples/spreads/gdprtemplate.xlsx,sha256=3k6FpN6n83yF5wYd64Yy8Rxv2b1Z497icIB8UBGDW-M,18662
|
|
16
16
|
opex_manifest_generator/samples/spreads/modstemplate.xlsx,sha256=5kNpp4Cju_POvnTrgFk34OJVe5yc6o3R4ZNX2TT8zAc,19509
|
|
17
|
-
opex_manifest_generator-1.1.
|
|
18
|
-
opex_manifest_generator-1.1.
|
|
19
|
-
opex_manifest_generator-1.1.
|
|
20
|
-
opex_manifest_generator-1.1.
|
|
21
|
-
opex_manifest_generator-1.1.
|
|
22
|
-
opex_manifest_generator-1.1.
|
|
17
|
+
opex_manifest_generator-1.1.11.dist-info/LICENSE.md,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
|
|
18
|
+
opex_manifest_generator-1.1.11.dist-info/METADATA,sha256=qYpxKL4BOjXcYip-W6Z_jDA-FXs22O65G9IYTcU6J9I,687
|
|
19
|
+
opex_manifest_generator-1.1.11.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
|
|
20
|
+
opex_manifest_generator-1.1.11.dist-info/entry_points.txt,sha256=WGMc3hWlqOsQ8DcTuy8-DyBbJKkWNImT4J1FasVDHts,70
|
|
21
|
+
opex_manifest_generator-1.1.11.dist-info/top_level.txt,sha256=K48eGnaDLVO6YDJdAZLqbeoZvJHBGX25cvYT-i8gWt0,24
|
|
22
|
+
opex_manifest_generator-1.1.11.dist-info/RECORD,,
|
{opex_manifest_generator-1.1.9.dist-info → opex_manifest_generator-1.1.11.dist-info}/LICENSE.md
RENAMED
|
File without changes
|
|
File without changes
|
{opex_manifest_generator-1.1.9.dist-info → opex_manifest_generator-1.1.11.dist-info}/top_level.txt
RENAMED
|
File without changes
|