opex-manifest-generator 1.1.13__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opex_manifest_generator/__init__.py +15 -15
- opex_manifest_generator/cli.py +182 -160
- opex_manifest_generator/common.py +64 -58
- opex_manifest_generator/hash.py +33 -33
- opex_manifest_generator/metadata/EAD Template.xml +101 -101
- opex_manifest_generator/metadata/GDPR Template.xml +8 -8
- opex_manifest_generator/metadata/MODS Template.xml +66 -66
- opex_manifest_generator/opex_manifest.py +89 -49
- opex_manifest_generator/options.properties +12 -12
- {opex_manifest_generator-1.1.13.dist-info → opex_manifest_generator-1.2.1.dist-info}/LICENSE.md +201 -201
- {opex_manifest_generator-1.1.13.dist-info → opex_manifest_generator-1.2.1.dist-info}/METADATA +16 -16
- opex_manifest_generator-1.2.1.dist-info/RECORD +22 -0
- opex_manifest_generator-1.1.13.dist-info/RECORD +0 -22
- {opex_manifest_generator-1.1.13.dist-info → opex_manifest_generator-1.2.1.dist-info}/WHEEL +0 -0
- {opex_manifest_generator-1.1.13.dist-info → opex_manifest_generator-1.2.1.dist-info}/entry_points.txt +0 -0
- {opex_manifest_generator-1.1.13.dist-info → opex_manifest_generator-1.2.1.dist-info}/top_level.txt +0 -0
|
@@ -1,102 +1,102 @@
|
|
|
1
|
-
<?xml version="1.0" encoding="utf-8"?>
|
|
2
|
-
|
|
3
|
-
<ead:ead xmlns:ead="urn:isbn:1-931666-22-9" xmlns="urn:isbn:1-931666-22-9"
|
|
4
|
-
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="urn:isbn:1-931666-22-9 http://www.loc.gov/ead/ead.xsd">
|
|
5
|
-
|
|
6
|
-
<eadheader>
|
|
7
|
-
<eadid></eadid>
|
|
8
|
-
<filedesc>
|
|
9
|
-
<titlestmt>
|
|
10
|
-
<titleproper></titleproper>
|
|
11
|
-
</titlestmt>
|
|
12
|
-
</filedesc>
|
|
13
|
-
<profiledesc>
|
|
14
|
-
<creation>
|
|
15
|
-
<date></date>
|
|
16
|
-
</creation>
|
|
17
|
-
<descrules></descrules>
|
|
18
|
-
</profiledesc>
|
|
19
|
-
</eadheader>
|
|
20
|
-
|
|
21
|
-
<archdesc level="series">
|
|
22
|
-
<did>
|
|
23
|
-
<unittitle></unittitle>
|
|
24
|
-
<unitdate label="Accumulation"></unitdate>
|
|
25
|
-
<unitdate label="Created"></unitdate>
|
|
26
|
-
<origination label="Creator"></origination>
|
|
27
|
-
<physdesc></physdesc>
|
|
28
|
-
<langmaterial></langmaterial>
|
|
29
|
-
</did>
|
|
30
|
-
|
|
31
|
-
<bioghist>
|
|
32
|
-
<head>Biography or History</head>
|
|
33
|
-
<p></p>
|
|
34
|
-
</bioghist>
|
|
35
|
-
<custodhist>
|
|
36
|
-
<head>Custodial history</head>
|
|
37
|
-
<p></p>
|
|
38
|
-
</custodhist>
|
|
39
|
-
<acqinfo>
|
|
40
|
-
<head>Acquisition Information</head>
|
|
41
|
-
<p></p>
|
|
42
|
-
</acqinfo>
|
|
43
|
-
<scopecontent>
|
|
44
|
-
<head>Scope and content</head>
|
|
45
|
-
<p></p>
|
|
46
|
-
</scopecontent>
|
|
47
|
-
<appraisal>
|
|
48
|
-
<head>Appraisal Information</head>
|
|
49
|
-
<p></p>
|
|
50
|
-
</appraisal>
|
|
51
|
-
<accruals>
|
|
52
|
-
<head>Accruals</head>
|
|
53
|
-
<p></p>
|
|
54
|
-
</accruals>
|
|
55
|
-
<arrangement>
|
|
56
|
-
<head>Arrangement</head>
|
|
57
|
-
<p></p>
|
|
58
|
-
</arrangement>
|
|
59
|
-
<accessrestrict>
|
|
60
|
-
<head>Conditions governing access</head>
|
|
61
|
-
<p></p>
|
|
62
|
-
</accessrestrict>
|
|
63
|
-
<userestrict>
|
|
64
|
-
<head>Conditions Governing Use</head>
|
|
65
|
-
<p></p>
|
|
66
|
-
</userestrict>
|
|
67
|
-
<phystech>
|
|
68
|
-
<head>Physical Characteristics and Technical Requirements</head>
|
|
69
|
-
<p></p>
|
|
70
|
-
</phystech>
|
|
71
|
-
<otherfindaid>
|
|
72
|
-
<head>Other Finding Aid</head>
|
|
73
|
-
<p></p>
|
|
74
|
-
</otherfindaid>
|
|
75
|
-
<originalsloc>
|
|
76
|
-
<head>Location of Originals</head>
|
|
77
|
-
<p></p>
|
|
78
|
-
</originalsloc>
|
|
79
|
-
<altformavail>
|
|
80
|
-
<head>Alternative Form Available</head>
|
|
81
|
-
<p></p>
|
|
82
|
-
</altformavail>
|
|
83
|
-
<relatedmaterial>
|
|
84
|
-
<head>Related Material</head>
|
|
85
|
-
<p></p>
|
|
86
|
-
</relatedmaterial>
|
|
87
|
-
<bibliography>
|
|
88
|
-
<head>Bibliography</head>
|
|
89
|
-
<p></p>
|
|
90
|
-
</bibliography>
|
|
91
|
-
<odd>
|
|
92
|
-
<head>Other Descriptive Data</head>
|
|
93
|
-
<note>
|
|
94
|
-
<p></p>
|
|
95
|
-
</note>
|
|
96
|
-
</odd>
|
|
97
|
-
<processinfo>
|
|
98
|
-
<head>Processing Information</head>
|
|
99
|
-
<p></p>
|
|
100
|
-
</processinfo>
|
|
101
|
-
</archdesc>
|
|
1
|
+
<?xml version="1.0" encoding="utf-8"?>
|
|
2
|
+
|
|
3
|
+
<ead:ead xmlns:ead="urn:isbn:1-931666-22-9" xmlns="urn:isbn:1-931666-22-9"
|
|
4
|
+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="urn:isbn:1-931666-22-9 http://www.loc.gov/ead/ead.xsd">
|
|
5
|
+
|
|
6
|
+
<eadheader>
|
|
7
|
+
<eadid></eadid>
|
|
8
|
+
<filedesc>
|
|
9
|
+
<titlestmt>
|
|
10
|
+
<titleproper></titleproper>
|
|
11
|
+
</titlestmt>
|
|
12
|
+
</filedesc>
|
|
13
|
+
<profiledesc>
|
|
14
|
+
<creation>
|
|
15
|
+
<date></date>
|
|
16
|
+
</creation>
|
|
17
|
+
<descrules></descrules>
|
|
18
|
+
</profiledesc>
|
|
19
|
+
</eadheader>
|
|
20
|
+
|
|
21
|
+
<archdesc level="series">
|
|
22
|
+
<did>
|
|
23
|
+
<unittitle></unittitle>
|
|
24
|
+
<unitdate label="Accumulation"></unitdate>
|
|
25
|
+
<unitdate label="Created"></unitdate>
|
|
26
|
+
<origination label="Creator"></origination>
|
|
27
|
+
<physdesc></physdesc>
|
|
28
|
+
<langmaterial></langmaterial>
|
|
29
|
+
</did>
|
|
30
|
+
|
|
31
|
+
<bioghist>
|
|
32
|
+
<head>Biography or History</head>
|
|
33
|
+
<p></p>
|
|
34
|
+
</bioghist>
|
|
35
|
+
<custodhist>
|
|
36
|
+
<head>Custodial history</head>
|
|
37
|
+
<p></p>
|
|
38
|
+
</custodhist>
|
|
39
|
+
<acqinfo>
|
|
40
|
+
<head>Acquisition Information</head>
|
|
41
|
+
<p></p>
|
|
42
|
+
</acqinfo>
|
|
43
|
+
<scopecontent>
|
|
44
|
+
<head>Scope and content</head>
|
|
45
|
+
<p></p>
|
|
46
|
+
</scopecontent>
|
|
47
|
+
<appraisal>
|
|
48
|
+
<head>Appraisal Information</head>
|
|
49
|
+
<p></p>
|
|
50
|
+
</appraisal>
|
|
51
|
+
<accruals>
|
|
52
|
+
<head>Accruals</head>
|
|
53
|
+
<p></p>
|
|
54
|
+
</accruals>
|
|
55
|
+
<arrangement>
|
|
56
|
+
<head>Arrangement</head>
|
|
57
|
+
<p></p>
|
|
58
|
+
</arrangement>
|
|
59
|
+
<accessrestrict>
|
|
60
|
+
<head>Conditions governing access</head>
|
|
61
|
+
<p></p>
|
|
62
|
+
</accessrestrict>
|
|
63
|
+
<userestrict>
|
|
64
|
+
<head>Conditions Governing Use</head>
|
|
65
|
+
<p></p>
|
|
66
|
+
</userestrict>
|
|
67
|
+
<phystech>
|
|
68
|
+
<head>Physical Characteristics and Technical Requirements</head>
|
|
69
|
+
<p></p>
|
|
70
|
+
</phystech>
|
|
71
|
+
<otherfindaid>
|
|
72
|
+
<head>Other Finding Aid</head>
|
|
73
|
+
<p></p>
|
|
74
|
+
</otherfindaid>
|
|
75
|
+
<originalsloc>
|
|
76
|
+
<head>Location of Originals</head>
|
|
77
|
+
<p></p>
|
|
78
|
+
</originalsloc>
|
|
79
|
+
<altformavail>
|
|
80
|
+
<head>Alternative Form Available</head>
|
|
81
|
+
<p></p>
|
|
82
|
+
</altformavail>
|
|
83
|
+
<relatedmaterial>
|
|
84
|
+
<head>Related Material</head>
|
|
85
|
+
<p></p>
|
|
86
|
+
</relatedmaterial>
|
|
87
|
+
<bibliography>
|
|
88
|
+
<head>Bibliography</head>
|
|
89
|
+
<p></p>
|
|
90
|
+
</bibliography>
|
|
91
|
+
<odd>
|
|
92
|
+
<head>Other Descriptive Data</head>
|
|
93
|
+
<note>
|
|
94
|
+
<p></p>
|
|
95
|
+
</note>
|
|
96
|
+
</odd>
|
|
97
|
+
<processinfo>
|
|
98
|
+
<head>Processing Information</head>
|
|
99
|
+
<p></p>
|
|
100
|
+
</processinfo>
|
|
101
|
+
</archdesc>
|
|
102
102
|
</ead:ead>
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
-
<gdpr xmlns="http://www.preservica.com/gdpr/v1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
|
3
|
-
<!-- The dates in this fragment should be ISO-8601 timestamps e.g. 2030-01-01T09:00:00Z -->
|
|
4
|
-
<personaldata></personaldata>
|
|
5
|
-
<recordobjected></recordobjected>
|
|
6
|
-
<recordobjecteddate></recordobjecteddate>
|
|
7
|
-
<recipientdisclosurecategory></recipientdisclosurecategory>
|
|
8
|
-
<timelimitforerasure></timelimitforerasure>
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
+
<gdpr xmlns="http://www.preservica.com/gdpr/v1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
|
3
|
+
<!-- The dates in this fragment should be ISO-8601 timestamps e.g. 2030-01-01T09:00:00Z -->
|
|
4
|
+
<personaldata></personaldata>
|
|
5
|
+
<recordobjected></recordobjected>
|
|
6
|
+
<recordobjecteddate></recordobjecteddate>
|
|
7
|
+
<recipientdisclosurecategory></recipientdisclosurecategory>
|
|
8
|
+
<timelimitforerasure></timelimitforerasure>
|
|
9
9
|
</gdpr>
|
|
@@ -1,67 +1,67 @@
|
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
-
<mods xmlns="http://www.loc.gov/mods/v3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-4.xsd" version="3.4">
|
|
3
|
-
<recordInfo>
|
|
4
|
-
<recordIdentifier></recordIdentifier>
|
|
5
|
-
<recordOrigin></recordOrigin>
|
|
6
|
-
<recordCreationDate></recordCreationDate>
|
|
7
|
-
<descriptionStandard></descriptionStandard>
|
|
8
|
-
</recordInfo>
|
|
9
|
-
<titleInfo>
|
|
10
|
-
<title></title>
|
|
11
|
-
</titleInfo>
|
|
12
|
-
<name>
|
|
13
|
-
<namePart></namePart>
|
|
14
|
-
<role>
|
|
15
|
-
<roleTerm type="text"></roleTerm>
|
|
16
|
-
</role>
|
|
17
|
-
</name>
|
|
18
|
-
<typeOfResource></typeOfResource>
|
|
19
|
-
<originInfo>
|
|
20
|
-
<dateCreated></dateCreated>
|
|
21
|
-
<dateOther></dateOther>
|
|
22
|
-
</originInfo>
|
|
23
|
-
<language>
|
|
24
|
-
<languageTerm></languageTerm>
|
|
25
|
-
</language>
|
|
26
|
-
<physicalDescription>
|
|
27
|
-
<extent></extent>
|
|
28
|
-
<form></form>
|
|
29
|
-
<note type="arrangement" displayLabel="System of arrangement"></note>
|
|
30
|
-
</physicalDescription>
|
|
31
|
-
<note type="biographyHistory" displayLabel="Biography or History"></note>
|
|
32
|
-
<note type="history" displayLabel="Custodial history"></note>
|
|
33
|
-
<note type="acquisition" displayLabel="Acquisition Information"></note>
|
|
34
|
-
<note type="appraisal" displayLabel="Appraisal Information"></note>
|
|
35
|
-
<note type="accrual" displayLabel="Accrual Information"></note>
|
|
36
|
-
<accessCondition type="access" displayLabel="Conditions governing access"></accessCondition>
|
|
37
|
-
<accessCondition type="use" displayLabel="Conditions governing use"></accessCondition>
|
|
38
|
-
<subject>
|
|
39
|
-
<topic></topic>
|
|
40
|
-
</subject>
|
|
41
|
-
<relatedItem type="host" displayLabel="Other finding aids">
|
|
42
|
-
<titleInfo>
|
|
43
|
-
<title></title>
|
|
44
|
-
</titleInfo>
|
|
45
|
-
</relatedItem>
|
|
46
|
-
<location>
|
|
47
|
-
<physicalLocation type="original" displayLabel="Location of Originals"></physicalLocation>
|
|
48
|
-
</location>
|
|
49
|
-
<relatedItem type="otherVersion" displayLabel="Alternative Form Available">
|
|
50
|
-
<titleInfo>
|
|
51
|
-
<title></title>
|
|
52
|
-
</titleInfo>
|
|
53
|
-
</relatedItem>
|
|
54
|
-
<relatedItem type="references" displayLabel="Material referenced">
|
|
55
|
-
<titleInfo>
|
|
56
|
-
<title></title>
|
|
57
|
-
</titleInfo>
|
|
58
|
-
</relatedItem>
|
|
59
|
-
<relatedItem type="isReferencedBy" displayLabel="Referencing material">
|
|
60
|
-
<titleInfo>
|
|
61
|
-
<title></title>
|
|
62
|
-
</titleInfo>
|
|
63
|
-
</relatedItem>
|
|
64
|
-
<note type="bibliography" displayLabel="Bibliography"></note>
|
|
65
|
-
<note type="other" displayLabel="Other descriptive data"></note>
|
|
66
|
-
<note type="processinfo" displayLabel="Processing Information"></note>
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
+
<mods xmlns="http://www.loc.gov/mods/v3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-4.xsd" version="3.4">
|
|
3
|
+
<recordInfo>
|
|
4
|
+
<recordIdentifier></recordIdentifier>
|
|
5
|
+
<recordOrigin></recordOrigin>
|
|
6
|
+
<recordCreationDate></recordCreationDate>
|
|
7
|
+
<descriptionStandard></descriptionStandard>
|
|
8
|
+
</recordInfo>
|
|
9
|
+
<titleInfo>
|
|
10
|
+
<title></title>
|
|
11
|
+
</titleInfo>
|
|
12
|
+
<name>
|
|
13
|
+
<namePart></namePart>
|
|
14
|
+
<role>
|
|
15
|
+
<roleTerm type="text"></roleTerm>
|
|
16
|
+
</role>
|
|
17
|
+
</name>
|
|
18
|
+
<typeOfResource></typeOfResource>
|
|
19
|
+
<originInfo>
|
|
20
|
+
<dateCreated></dateCreated>
|
|
21
|
+
<dateOther></dateOther>
|
|
22
|
+
</originInfo>
|
|
23
|
+
<language>
|
|
24
|
+
<languageTerm></languageTerm>
|
|
25
|
+
</language>
|
|
26
|
+
<physicalDescription>
|
|
27
|
+
<extent></extent>
|
|
28
|
+
<form></form>
|
|
29
|
+
<note type="arrangement" displayLabel="System of arrangement"></note>
|
|
30
|
+
</physicalDescription>
|
|
31
|
+
<note type="biographyHistory" displayLabel="Biography or History"></note>
|
|
32
|
+
<note type="history" displayLabel="Custodial history"></note>
|
|
33
|
+
<note type="acquisition" displayLabel="Acquisition Information"></note>
|
|
34
|
+
<note type="appraisal" displayLabel="Appraisal Information"></note>
|
|
35
|
+
<note type="accrual" displayLabel="Accrual Information"></note>
|
|
36
|
+
<accessCondition type="access" displayLabel="Conditions governing access"></accessCondition>
|
|
37
|
+
<accessCondition type="use" displayLabel="Conditions governing use"></accessCondition>
|
|
38
|
+
<subject>
|
|
39
|
+
<topic></topic>
|
|
40
|
+
</subject>
|
|
41
|
+
<relatedItem type="host" displayLabel="Other finding aids">
|
|
42
|
+
<titleInfo>
|
|
43
|
+
<title></title>
|
|
44
|
+
</titleInfo>
|
|
45
|
+
</relatedItem>
|
|
46
|
+
<location>
|
|
47
|
+
<physicalLocation type="original" displayLabel="Location of Originals"></physicalLocation>
|
|
48
|
+
</location>
|
|
49
|
+
<relatedItem type="otherVersion" displayLabel="Alternative Form Available">
|
|
50
|
+
<titleInfo>
|
|
51
|
+
<title></title>
|
|
52
|
+
</titleInfo>
|
|
53
|
+
</relatedItem>
|
|
54
|
+
<relatedItem type="references" displayLabel="Material referenced">
|
|
55
|
+
<titleInfo>
|
|
56
|
+
<title></title>
|
|
57
|
+
</titleInfo>
|
|
58
|
+
</relatedItem>
|
|
59
|
+
<relatedItem type="isReferencedBy" displayLabel="Referencing material">
|
|
60
|
+
<titleInfo>
|
|
61
|
+
<title></title>
|
|
62
|
+
</titleInfo>
|
|
63
|
+
</relatedItem>
|
|
64
|
+
<note type="bibliography" displayLabel="Bibliography"></note>
|
|
65
|
+
<note type="other" displayLabel="Other descriptive data"></note>
|
|
66
|
+
<note type="processinfo" displayLabel="Processing Information"></note>
|
|
67
67
|
</mods>
|
|
@@ -19,15 +19,19 @@ from opex_manifest_generator.common import *
|
|
|
19
19
|
import configparser
|
|
20
20
|
|
|
21
21
|
class OpexManifestGenerator():
|
|
22
|
+
"""
|
|
23
|
+
A Tool for Generating Opexes
|
|
24
|
+
"""
|
|
22
25
|
def __init__(self,
|
|
23
26
|
root: str,
|
|
24
|
-
output_path:
|
|
27
|
+
output_path: str = os.getcwd(),
|
|
25
28
|
meta_dir_flag: bool = True,
|
|
26
|
-
metadata_dir:
|
|
29
|
+
metadata_dir: str = os.path.join(os.path.dirname(os.path.realpath(__file__)), "metadata"),
|
|
27
30
|
metadata_flag: str = 'none',
|
|
28
31
|
autoclass_flag: str = None,
|
|
29
32
|
prefix: str = None,
|
|
30
33
|
acc_prefix: str = None,
|
|
34
|
+
accession_mode: str = False,
|
|
31
35
|
startref: int = 1,
|
|
32
36
|
algorithm: str = None,
|
|
33
37
|
empty_flag: bool = False,
|
|
@@ -39,7 +43,13 @@ class OpexManifestGenerator():
|
|
|
39
43
|
hidden_flag: bool = False,
|
|
40
44
|
output_format: str = "xlsx",
|
|
41
45
|
print_xmls_flag: bool = False,
|
|
42
|
-
options_file: str = os.path.join(os.path.dirname(__file__),'options.properties')
|
|
46
|
+
options_file: str = os.path.join(os.path.dirname(__file__),'options.properties'),
|
|
47
|
+
keywords: list = None,
|
|
48
|
+
keywords_mode: str = "intitalise",
|
|
49
|
+
keywords_retain_order: bool = False,
|
|
50
|
+
sort_key = lambda x: (os.path.isfile(x), str.casefold(x)),
|
|
51
|
+
keywords_abbreviation_number: int = 3,
|
|
52
|
+
delimiter = "/"):
|
|
43
53
|
|
|
44
54
|
self.root = os.path.abspath(root)
|
|
45
55
|
self.opexns = "http://www.openpreservationexchange.org/opex/v1.2"
|
|
@@ -49,6 +59,8 @@ class OpexManifestGenerator():
|
|
|
49
59
|
self.algorithm = algorithm
|
|
50
60
|
self.empty_flag = empty_flag
|
|
51
61
|
self.remove_flag = remove_flag
|
|
62
|
+
if self.remove_flag:
|
|
63
|
+
self.remove_list = []
|
|
52
64
|
self.export_flag = export_flag
|
|
53
65
|
self.startref = startref
|
|
54
66
|
self.autoclass_flag = autoclass_flag
|
|
@@ -57,6 +69,7 @@ class OpexManifestGenerator():
|
|
|
57
69
|
self.meta_dir_flag = meta_dir_flag
|
|
58
70
|
self.prefix = prefix
|
|
59
71
|
self.acc_prefix = acc_prefix
|
|
72
|
+
self.accession_mode = accession_mode
|
|
60
73
|
self.input = input
|
|
61
74
|
self.hidden_flag = hidden_flag
|
|
62
75
|
self.zip_flag = zip_flag
|
|
@@ -65,13 +78,19 @@ class OpexManifestGenerator():
|
|
|
65
78
|
self.metadata_dir = metadata_dir
|
|
66
79
|
self.print_xmls_flag = print_xmls_flag
|
|
67
80
|
self.parse_config(options_file=os.path.abspath(options_file))
|
|
81
|
+
self.keywords_list = keywords
|
|
82
|
+
self.keywords_mode = keywords_mode
|
|
83
|
+
self.keywords_retain_order = keywords_retain_order
|
|
84
|
+
self.sort_key = sort_key
|
|
85
|
+
self.keywords_abbreviation_number = keywords_abbreviation_number
|
|
86
|
+
self.delimiter = delimiter
|
|
68
87
|
|
|
69
88
|
self.title_flag = False
|
|
70
89
|
self.description_flag = False
|
|
71
90
|
self.security_flag = False
|
|
72
91
|
self.ignore_flag = False
|
|
73
92
|
self.sourceid_flag = False
|
|
74
|
-
self.hash_from_spread = False
|
|
93
|
+
self.hash_from_spread = False
|
|
75
94
|
|
|
76
95
|
def parse_config(self, options_file: str = 'options.properties'):
|
|
77
96
|
config = configparser.ConfigParser()
|
|
@@ -130,10 +149,19 @@ class OpexManifestGenerator():
|
|
|
130
149
|
|
|
131
150
|
def init_df(self):
|
|
132
151
|
if self.autoclass_flag:
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
152
|
+
ac = ClassificationGenerator(self.root,
|
|
153
|
+
output_path = self.output_path,
|
|
154
|
+
prefix = self.prefix,
|
|
155
|
+
accprefix = self.acc_prefix,
|
|
156
|
+
start_ref = self.startref,
|
|
157
|
+
empty_flag = self.empty_flag,
|
|
158
|
+
accession_flag=self.accession_mode,
|
|
159
|
+
keywords = self.keywords_list,
|
|
160
|
+
keywords_mode = self.keywords_mode,
|
|
161
|
+
keywords_retain_order = self.keywords_retain_order,
|
|
162
|
+
sort_key = self.sort_key,
|
|
163
|
+
keywords_abbreviation_number = self.keywords_abbreviation_number,
|
|
164
|
+
delimiter = self.delimiter)
|
|
137
165
|
self.df = ac.init_dataframe()
|
|
138
166
|
if self.autoclass_flag in {"accession", "a", "accesion-generic", "ag"}:
|
|
139
167
|
self.df = self.df.drop('Archive_Reference', axis=1)
|
|
@@ -188,19 +216,25 @@ class OpexManifestGenerator():
|
|
|
188
216
|
print('Error Looking up XIP Metadata')
|
|
189
217
|
print(e)
|
|
190
218
|
|
|
191
|
-
def remove_df_lookup(self, path: str, idx: pd.Index):
|
|
219
|
+
def remove_df_lookup(self, path: str, removed_list: list, idx: pd.Index):
|
|
192
220
|
try:
|
|
193
221
|
if idx.empty:
|
|
194
222
|
return False
|
|
195
223
|
else:
|
|
196
224
|
remove = check_nan(self.df[REMOVAL_FIELD].loc[idx].item())
|
|
197
|
-
if remove:
|
|
225
|
+
if remove is not None:
|
|
226
|
+
removed_list.append(path)
|
|
198
227
|
print(f"Removing: {path}")
|
|
199
|
-
# Not functioning correctly
|
|
200
228
|
if os.path.isdir(path):
|
|
229
|
+
for dp,d,f in os.walk(path):
|
|
230
|
+
for fn in f:
|
|
231
|
+
removed_list.append(win_256_check(dp+win_path_delimiter()+fn))
|
|
232
|
+
for dn in d:
|
|
233
|
+
removed_list.append(win_256_check(dp+win_path_delimiter()+dn))
|
|
201
234
|
shutil.rmtree(path)
|
|
202
235
|
else:
|
|
203
|
-
os.
|
|
236
|
+
if os.path.exists(path):
|
|
237
|
+
os.remove(path)
|
|
204
238
|
return True
|
|
205
239
|
else:
|
|
206
240
|
return False
|
|
@@ -285,7 +319,7 @@ class OpexManifestGenerator():
|
|
|
285
319
|
xml_file = ET.parse(path)
|
|
286
320
|
root_element = ET.QName(xml_file.find('.'))
|
|
287
321
|
root_element_ln = root_element.localname
|
|
288
|
-
root_element_ns = root_element.namespace
|
|
322
|
+
#root_element_ns = root_element.namespace
|
|
289
323
|
elements_list = []
|
|
290
324
|
for elem in xml_file.findall('.//'):
|
|
291
325
|
elem_path = xml_file.getelementpath(elem)
|
|
@@ -309,7 +343,6 @@ class OpexManifestGenerator():
|
|
|
309
343
|
"""
|
|
310
344
|
Composes the data into an xml file.
|
|
311
345
|
"""
|
|
312
|
-
print(self.metadata_dir)
|
|
313
346
|
for xml_file in self.xml_files:
|
|
314
347
|
list_xml = xml_file.get('data')
|
|
315
348
|
localname = xml_file.get('localname')
|
|
@@ -408,6 +441,9 @@ class OpexManifestGenerator():
|
|
|
408
441
|
if self.algorithm:
|
|
409
442
|
output_path = define_output_file(self.output_path, self.root, self.meta_dir_flag, output_suffix = "_Fixities", output_format = "txt")
|
|
410
443
|
export_list_txt(self.list_fixity, output_path)
|
|
444
|
+
if self.remove_flag:
|
|
445
|
+
output_path = define_output_file(self.output_path, self.root, self.meta_dir_flag, output_suffix = "_Removed", output_format = "txt")
|
|
446
|
+
export_list_txt(self.remove_list, output_path)
|
|
411
447
|
print_running_time(self.start_time)
|
|
412
448
|
|
|
413
449
|
class OpexDir(OpexManifestGenerator):
|
|
@@ -419,6 +455,7 @@ class OpexDir(OpexManifestGenerator):
|
|
|
419
455
|
self.folder_path = folder_path.replace(u'\\\\?\\', "")
|
|
420
456
|
else:
|
|
421
457
|
self.folder_path = folder_path
|
|
458
|
+
print(self.folder_path)
|
|
422
459
|
if any([self.OMG.input,
|
|
423
460
|
self.OMG.autoclass_flag in {"c","catalog","a","accession","b","both","cg","catalog-generic","ag","accession-generic","bg","both-generic"},
|
|
424
461
|
self.OMG.ignore_flag,
|
|
@@ -432,19 +469,17 @@ class OpexDir(OpexManifestGenerator):
|
|
|
432
469
|
index = None
|
|
433
470
|
else:
|
|
434
471
|
index = None
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
self.removal = False
|
|
447
|
-
|
|
472
|
+
self.ignore = False
|
|
473
|
+
self.removal = False
|
|
474
|
+
if self.OMG.ignore_flag:
|
|
475
|
+
self.ignore = self.OMG.ignore_df_lookup(index)
|
|
476
|
+
if self.ignore:
|
|
477
|
+
return
|
|
478
|
+
if self.OMG.remove_flag:
|
|
479
|
+
self.removal = self.OMG.remove_df_lookup(self.folder_path, self.OMG.remove_list, index)
|
|
480
|
+
if self.removal:
|
|
481
|
+
return
|
|
482
|
+
print(self.removal)
|
|
448
483
|
self.xmlroot = ET.Element(f"{{{self.opexns}}}OPEXMetadata", nsmap={"opex":self.opexns})
|
|
449
484
|
self.transfer = ET.SubElement(self.xmlroot, f"{{{self.opexns}}}Transfer")
|
|
450
485
|
self.manifest = ET.SubElement(self.transfer, f"{{{self.opexns}}}Manifest")
|
|
@@ -491,23 +526,29 @@ class OpexDir(OpexManifestGenerator):
|
|
|
491
526
|
print('Failed to Filter')
|
|
492
527
|
print(e)
|
|
493
528
|
raise SystemError()
|
|
494
|
-
|
|
495
529
|
|
|
496
530
|
def generate_opex_dirs(self, path: str):
|
|
497
531
|
self = OpexDir(self.OMG, path)
|
|
498
532
|
opex_path = os.path.join(os.path.abspath(self.folder_path), os.path.basename(self.folder_path))
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
if
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
533
|
+
if self.removal is True:
|
|
534
|
+
pass
|
|
535
|
+
else:
|
|
536
|
+
for f_path in self.filter_directories(path):
|
|
537
|
+
if f_path.endswith('.opex'):
|
|
538
|
+
pass
|
|
539
|
+
elif os.path.isdir(f_path):
|
|
540
|
+
if self.ignore is True:
|
|
541
|
+
pass
|
|
542
|
+
else:
|
|
543
|
+
self.folder = ET.SubElement(self.folders, f"{{{self.opexns}}}Folder")
|
|
544
|
+
self.folder.text = str(os.path.basename(f_path))
|
|
545
|
+
self.generate_opex_dirs(f_path)
|
|
546
|
+
else:
|
|
547
|
+
OpexFile(self.OMG, f_path, self.OMG.algorithm)
|
|
548
|
+
if self.removal is True or self.ignore is True:
|
|
549
|
+
pass
|
|
550
|
+
else:
|
|
551
|
+
if check_opex(opex_path):
|
|
511
552
|
for f_path in self.filter_directories(path):
|
|
512
553
|
if os.path.isfile(f_path):
|
|
513
554
|
file = ET.SubElement(self.files, f"{{{self.opexns}}}File")
|
|
@@ -518,8 +559,8 @@ class OpexDir(OpexManifestGenerator):
|
|
|
518
559
|
file.set("size", str(os.path.getsize(f_path)))
|
|
519
560
|
file.text = str(os.path.basename(f_path))
|
|
520
561
|
write_opex(opex_path, self.xmlroot)
|
|
521
|
-
|
|
522
|
-
|
|
562
|
+
else:
|
|
563
|
+
print(f"Avoiding override, Opex exists at: {opex_path}")
|
|
523
564
|
|
|
524
565
|
class OpexFile(OpexManifestGenerator):
|
|
525
566
|
def __init__(self, OMG: OpexManifestGenerator, file_path: str, algorithm: str = None, title: str = None, description: str = None, security: str = None):
|
|
@@ -543,17 +584,16 @@ class OpexFile(OpexManifestGenerator):
|
|
|
543
584
|
index = None
|
|
544
585
|
else:
|
|
545
586
|
index = None
|
|
587
|
+
self.ignore = False
|
|
588
|
+
self.removal = False
|
|
546
589
|
if self.OMG.ignore_flag:
|
|
547
590
|
self.ignore = self.OMG.ignore_df_lookup(index)
|
|
548
591
|
if self.ignore:
|
|
549
|
-
|
|
550
|
-
return
|
|
592
|
+
return
|
|
551
593
|
if self.OMG.remove_flag:
|
|
552
|
-
removal = self.OMG.remove_df_lookup(self.file_path, index)
|
|
553
|
-
if removal:
|
|
554
|
-
return
|
|
555
|
-
else:
|
|
556
|
-
self.ignore = False
|
|
594
|
+
self.removal = self.OMG.remove_df_lookup(self.file_path, self.OMG.remove_list, index)
|
|
595
|
+
if self.removal:
|
|
596
|
+
return
|
|
557
597
|
self.algorithm = algorithm
|
|
558
598
|
if self.OMG.title_flag or self.OMG.description_flag or self.OMG.security_flag:
|
|
559
599
|
self.title, self.description, self.security = self.OMG.xip_df_lookup(index)
|
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
[options]
|
|
2
|
-
|
|
3
|
-
INDEX_FIELD = FullName
|
|
4
|
-
TITLE_FIELD = Title
|
|
5
|
-
DESCRIPTION_FIELD = Description
|
|
6
|
-
SECUIRTY_FIELD = Security
|
|
7
|
-
IDENTIFIER_FIELD = Identifier
|
|
8
|
-
IDENTIFIER_DEFAULT = code
|
|
9
|
-
REMOVAL_FIELD = Removals
|
|
10
|
-
IGNORE_FIELD = Ignore
|
|
11
|
-
SOURCEID_FIELD = SourceID
|
|
12
|
-
HASH_FIELD = Hash
|
|
1
|
+
[options]
|
|
2
|
+
|
|
3
|
+
INDEX_FIELD = FullName
|
|
4
|
+
TITLE_FIELD = Title
|
|
5
|
+
DESCRIPTION_FIELD = Description
|
|
6
|
+
SECUIRTY_FIELD = Security
|
|
7
|
+
IDENTIFIER_FIELD = Identifier
|
|
8
|
+
IDENTIFIER_DEFAULT = code
|
|
9
|
+
REMOVAL_FIELD = Removals
|
|
10
|
+
IGNORE_FIELD = Ignore
|
|
11
|
+
SOURCEID_FIELD = SourceID
|
|
12
|
+
HASH_FIELD = Hash
|
|
13
13
|
ALGORITHM_FIELD = Algorithm
|