libis-format 0.9.30 → 0.9.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/libis/format/converter/image_converter.rb +2 -2
- data/lib/libis/format/office_to_pdf.rb +1 -1
- data/lib/libis/format/version.rb +1 -1
- data/spec/converter_spec.rb +43 -27
- data/spec/data/test-options.png +0 -0
- data/spec/data/test.pdf.tif +0 -0
- data/tools/droid/{DROID_SignatureFile_V82.xml → DROID_SignatureFile_V90.xml} +8202 -701
- data/tools/droid/{container-signature-20150307.xml → container-signature-20170330.xml} +3584 -2235
- data/tools/droid/droid-command-line-6.3.jar +0 -0
- data/tools/droid/droid.bat +152 -154
- data/tools/droid/droid.sh +30 -16
- data/tools/droid/lib/aspectjrt-1.8.7.jar +0 -0
- data/tools/droid/lib/aspectjweaver-1.8.7.jar +0 -0
- data/tools/droid/lib/byteseek-2.0.3.jar +0 -0
- data/tools/droid/lib/commons-codec-1.10.jar +0 -0
- data/tools/droid/lib/commons-collections-3.2.2.jar +0 -0
- data/tools/droid/lib/droid-container-6.3.jar +0 -0
- data/tools/droid/lib/droid-core-6.3.jar +0 -0
- data/tools/droid/lib/droid-core-interfaces-6.3.jar +0 -0
- data/tools/droid/lib/droid-export-6.3.jar +0 -0
- data/tools/droid/lib/droid-export-interfaces-6.3.jar +0 -0
- data/tools/droid/lib/{droid-help-6.1.5.jar → droid-help-6.3.jar} +0 -0
- data/tools/droid/lib/droid-report-6.3.jar +0 -0
- data/tools/droid/lib/droid-report-interfaces-6.3.jar +0 -0
- data/tools/droid/lib/droid-results-6.3.jar +0 -0
- data/tools/droid/lib/jwat-arc-1.0.3.jar +0 -0
- data/tools/droid/lib/jwat-archive-common-1.0.3.jar +0 -0
- data/tools/droid/lib/jwat-common-1.0.3.jar +0 -0
- data/tools/droid/lib/jwat-gzip-1.0.3.jar +0 -0
- data/tools/droid/lib/jwat-warc-1.0.2.jar +0 -0
- data/tools/droid/lib/poi-3.13.jar +0 -0
- data/tools/droid/lib/saaj-api-1.3.jar +0 -0
- data/tools/droid/lib/trove4j-3.0.3.jar +0 -0
- data/tools/fido/__init__.py +50 -0
- data/tools/fido/conf/DROID_SignatureFile-v90.xml +2 -0
- data/tools/fido/conf/{container-signature-20150307.xml → container-signature-20170330.xml} +1487 -141
- data/tools/fido/conf/format_extensions.xml +0 -14
- data/tools/fido/conf/{formats-v81.xml → formats-v90.xml} +11409 -887
- data/tools/fido/conf/{pronom-xml-v81.zip → pronom-xml-v90.zip} +0 -0
- data/tools/fido/conf/versions.xml +6 -6
- data/tools/fido/fido.py +437 -407
- data/tools/fido/package.py +96 -0
- data/tools/fido/prepare.py +217 -188
- data/tools/fido/pronomutils.py +143 -58
- data/tools/fido/toxml.py +54 -46
- data/tools/fido/update_signatures.py +139 -127
- metadata +34 -40
- data/tools/droid/droid-command-line-6.1.5.jar +0 -0
- data/tools/droid/lib/antlr-2.7.7.jar +0 -0
- data/tools/droid/lib/antlr-3.2.jar +0 -0
- data/tools/droid/lib/antlr-runtime-3.2.jar +0 -0
- data/tools/droid/lib/aspectjrt-1.7.2.jar +0 -0
- data/tools/droid/lib/aspectjweaver-1.7.2.jar +0 -0
- data/tools/droid/lib/byteseek-1.1.1.jar +0 -0
- data/tools/droid/lib/commons-codec-1.4.jar +0 -0
- data/tools/droid/lib/commons-collections-3.2.1.jar +0 -0
- data/tools/droid/lib/dom4j-1.6.1.jar +0 -0
- data/tools/droid/lib/droid-container-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-core-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-core-interfaces-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-export-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-export-interfaces-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-report-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-report-interfaces-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-results-6.1.5.jar +0 -0
- data/tools/droid/lib/ejb3-persistence-1.0.2.GA.jar +0 -0
- data/tools/droid/lib/hibernate-commons-annotations-4.0.4.Final.jar +0 -0
- data/tools/droid/lib/hibernate-core-4.3.5.Final.jar +0 -0
- data/tools/droid/lib/hibernate-entitymanager-4.3.5.Final.jar +0 -0
- data/tools/droid/lib/hibernate-jpa-2.1-api-1.0.0.Final.jar +0 -0
- data/tools/droid/lib/jandex-1.1.0.Final.jar +0 -0
- data/tools/droid/lib/javassist-3.18.1-GA.jar +0 -0
- data/tools/droid/lib/jboss-logging-annotations-1.2.0.Beta1.jar +0 -0
- data/tools/droid/lib/jboss-transaction-api_1.2_spec-1.0.0.Final.jar +0 -0
- data/tools/droid/lib/poi-3.7.jar +0 -0
- data/tools/droid/lib/stringtemplate-3.2.jar +0 -0
- data/tools/fido/argparselocal.py +0 -2355
- data/tools/fido/conf/DROID_SignatureFile-v81.xml +0 -2
data/tools/fido/pronomutils.py
CHANGED
|
@@ -1,115 +1,200 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
PRONOM UTILS.
|
|
5
|
+
|
|
6
|
+
PYTHON FUNCTION TO QUERY PRONOM VERSION
|
|
7
|
+
AND DOWNLOAD SIGNATUREFILE
|
|
8
|
+
USES PRONOM SOAP SERVICE
|
|
9
|
+
|
|
10
|
+
Open Planets Foundation (http://www.openplanetsfoundation.org)
|
|
11
|
+
See License.txt for license information.
|
|
12
|
+
Download from: http://github.com/openplanets/fido/downloads
|
|
13
|
+
Author: Maurice de Rooij (OPF/NANETH), 2012
|
|
14
|
+
|
|
15
|
+
PRONOM UTILS is a library used by FIDO.
|
|
16
|
+
FIDO uses the UK National Archives (TNA) PRONOM File Format and Container descriptions.
|
|
17
|
+
PRONOM is available from http://www.nationalarchives.gov.uk/pronom/
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import absolute_import
|
|
21
|
+
|
|
22
|
+
import os
|
|
23
|
+
import re
|
|
18
24
|
import sys
|
|
19
|
-
from xml.
|
|
20
|
-
|
|
25
|
+
from xml.etree import ElementTree as ET
|
|
26
|
+
from xml.etree.ElementTree import parse, ParseError
|
|
27
|
+
from xml.parsers.expat import ExpatError, ParserCreate
|
|
28
|
+
|
|
29
|
+
import six
|
|
30
|
+
from six.moves import http_client
|
|
31
|
+
|
|
32
|
+
from . import __version__, CONFIG_DIR
|
|
21
33
|
|
|
22
|
-
|
|
34
|
+
|
|
35
|
+
def check_well_formedness(filename, error=False):
|
|
23
36
|
"""
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
37
|
+
Check if a given file contains valid XML.
|
|
38
|
+
|
|
39
|
+
:param filename: file from which the XML is read.
|
|
40
|
+
:param error: whether or not print to `stderr` upon error.
|
|
41
|
+
:returns: whether the file contains valid XML.
|
|
28
42
|
"""
|
|
29
|
-
|
|
30
|
-
parser = xml.parsers.expat.ParserCreate()
|
|
43
|
+
parser = ParserCreate()
|
|
31
44
|
try:
|
|
32
45
|
parser.ParseFile(open(filename, "r"))
|
|
33
|
-
except
|
|
46
|
+
except ExpatError as e:
|
|
34
47
|
if error is not False:
|
|
35
|
-
sys.stderr.write("
|
|
48
|
+
sys.stderr.write("check_well_formedness: %s: %s;\n" % (filename, e))
|
|
36
49
|
return False
|
|
37
50
|
return True
|
|
38
51
|
|
|
39
|
-
|
|
52
|
+
|
|
53
|
+
def get_pronom_signature(type_):
|
|
40
54
|
"""
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
55
|
+
Get PRONOM signature.
|
|
56
|
+
|
|
57
|
+
Return latest signature file version number as int when `type_` equals
|
|
58
|
+
"version" or return latest signature XML file as string when `type_` equals
|
|
59
|
+
"file". Upon error, write to `stderr` and returls `False`.
|
|
46
60
|
"""
|
|
47
61
|
try:
|
|
48
|
-
import httplib
|
|
49
|
-
import re
|
|
50
|
-
import os
|
|
51
62
|
soapVersionContainer = """<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><getSignatureFileVersionV1 xmlns="http://pronom.nationalarchives.gov.uk" /></soap:Body></soap:Envelope>"""
|
|
52
63
|
soapFileContainer = """<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><getSignatureFileV1 xmlns="http://pronom.nationalarchives.gov.uk" /></soap:Body></soap:Envelope>"""
|
|
53
64
|
soapVersionHeader = """\"http://pronom.nationalarchives.gov.uk:getSignatureFileVersionV1In\""""
|
|
54
65
|
soapFileHeader = """\"http://pronom.nationalarchives.gov.uk:getSignatureFileV1In\""""
|
|
55
|
-
if
|
|
66
|
+
if type_ == "version":
|
|
56
67
|
soapAction = soapVersionHeader
|
|
57
68
|
soapStr = soapVersionContainer
|
|
58
|
-
elif
|
|
69
|
+
elif type_ == "file":
|
|
59
70
|
soapAction = soapFileHeader
|
|
60
71
|
soapStr = soapFileContainer
|
|
61
72
|
else:
|
|
62
|
-
sys.stderr.write("
|
|
73
|
+
sys.stderr.write("get_pronom_signature(): unknown type: " + type_)
|
|
63
74
|
return False
|
|
64
|
-
webservice =
|
|
75
|
+
webservice = http_client.HTTP("www.nationalarchives.gov.uk")
|
|
65
76
|
webservice.putrequest("POST", "/pronom/service.asmx")
|
|
66
77
|
webservice.putheader("Host", "www.nationalarchives.gov.uk")
|
|
67
|
-
webservice.putheader("User-Agent", "PRONOM UTILS v{0} (OPF)".format(
|
|
78
|
+
webservice.putheader("User-Agent", "PRONOM UTILS v{0} (OPF)".format(__version__))
|
|
68
79
|
webservice.putheader("Content-type", "text/xml; charset=\"UTF-8\"")
|
|
69
80
|
webservice.putheader("Content-length", "%d" % len(soapStr))
|
|
70
81
|
webservice.putheader("SOAPAction", soapAction)
|
|
71
82
|
try:
|
|
72
83
|
webservice.endheaders()
|
|
73
|
-
except Exception
|
|
74
|
-
sys.stderr.write("
|
|
84
|
+
except Exception as e:
|
|
85
|
+
sys.stderr.write("get_pronom_signature(): failed to contact PRONOM;\n%s\n" % (e))
|
|
75
86
|
sys.exit()
|
|
76
87
|
webservice.send(soapStr)
|
|
77
88
|
statuscode, statusmessage, header = webservice.getreply()
|
|
78
89
|
if statuscode == 200:
|
|
79
90
|
xml = webservice.getfile()
|
|
80
|
-
if
|
|
91
|
+
if type_ == "version":
|
|
81
92
|
exp = re.compile("\<Version\>([0-9]{1,4})\<\/Version\>")
|
|
82
93
|
sigxml = exp.search(xml.read())
|
|
83
94
|
if len(sigxml.group(1)) > 0:
|
|
84
95
|
return int(sigxml.group(1))
|
|
85
96
|
else:
|
|
86
|
-
sys.stderr.write("
|
|
97
|
+
sys.stderr.write("get_pronom_signature(): could not parse VERSION from SOAP response: " + type_)
|
|
87
98
|
return False
|
|
88
|
-
if
|
|
99
|
+
if type_ == "file":
|
|
89
100
|
exp = re.compile("\<SignatureFile\>.*\<\/SignatureFile\>")
|
|
90
101
|
sigxml = exp.search(xml.read())
|
|
91
102
|
sigtxt = sigxml.group(0) if sigxml else ''
|
|
92
103
|
if len(sigtxt) > 0:
|
|
93
104
|
tmpfile = "./tmp_getPronomSignature.xml"
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
if not checkWellFormedness(tmpfile):
|
|
105
|
+
with open(tmpfile, 'wb') as file_:
|
|
106
|
+
file_.write("""<?xml version="1.0" encoding="UTF-8"?>""" + "\n")
|
|
107
|
+
file_.write(sigtxt)
|
|
108
|
+
if not check_well_formedness(tmpfile):
|
|
99
109
|
os.unlink(tmpfile)
|
|
100
|
-
sys.stderr.write("
|
|
110
|
+
sys.stderr.write("get_pronom_signature(): signaturefile not well formed")
|
|
101
111
|
return False
|
|
102
112
|
else:
|
|
103
113
|
os.unlink(tmpfile)
|
|
104
|
-
return """<?xml version="1.0" encoding="UTF-8"?>"""+"\n"+sigtxt
|
|
114
|
+
return """<?xml version="1.0" encoding="UTF-8"?>""" + "\n" + sigtxt
|
|
105
115
|
else:
|
|
106
|
-
sys.stderr.write("
|
|
116
|
+
sys.stderr.write("get_pronom_signature(): could not parse XML from SOAP response: " + type_)
|
|
107
117
|
return False
|
|
108
118
|
else:
|
|
109
|
-
sys.stderr.write("
|
|
119
|
+
sys.stderr.write("get_pronom_signature(): webservice error: '" + str(statuscode) + " " + statusmessage + "'\n")
|
|
110
120
|
return False
|
|
111
|
-
|
|
121
|
+
sys.stderr.write("get_pronom_signature(): unexpected return")
|
|
112
122
|
return False
|
|
113
|
-
except Exception
|
|
114
|
-
|
|
123
|
+
except Exception as e:
|
|
124
|
+
sys.stderr.write("get_pronom_signature(): unknown error: " + str(e))
|
|
115
125
|
return False
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class LocalPronomVersions(object):
|
|
129
|
+
"""
|
|
130
|
+
Parse local PRONOM signature versions XML file.
|
|
131
|
+
|
|
132
|
+
This is how the XML document should look like:
|
|
133
|
+
|
|
134
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
135
|
+
<versions>
|
|
136
|
+
<pronomVersion>84</pronomVersion>
|
|
137
|
+
<pronomSignature>formats-v84.xml</pronomSignature>
|
|
138
|
+
<pronomContainerSignature>container-signature-20160121.xml</pronomContainerSignature>
|
|
139
|
+
<fidoExtensionSignature>format_extensions.xml</fidoExtensionSignature>
|
|
140
|
+
<updateScript>1.2.2</updateScript>
|
|
141
|
+
</versions>
|
|
142
|
+
"""
|
|
143
|
+
|
|
144
|
+
PROPS_MAPPING = {
|
|
145
|
+
'pronom_version': 'pronomVersion',
|
|
146
|
+
'pronom_signature': 'pronomSignature',
|
|
147
|
+
'pronom_container_signature': 'pronomContainerSignature',
|
|
148
|
+
'fido_extension_signature': 'fidoExtensionSignature',
|
|
149
|
+
'update_script': 'updateScript',
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
ROOT_ELEMENT = 'versions'
|
|
153
|
+
|
|
154
|
+
def __init__(self, versions_file):
|
|
155
|
+
"""Instantiate class based on the file indicated in `versions_file`."""
|
|
156
|
+
self.versions_file = versions_file
|
|
157
|
+
self.conf_dir = os.path.abspath(os.path.dirname(versions_file))
|
|
158
|
+
try:
|
|
159
|
+
self.tree = parse(versions_file)
|
|
160
|
+
self.root = self.tree.getroot()
|
|
161
|
+
except (ParseError, IOError):
|
|
162
|
+
self.root = ET.Element(self.ROOT_ELEMENT)
|
|
163
|
+
self.tree = ET.ElementTree(self.root)
|
|
164
|
+
|
|
165
|
+
def __getattr__(self, name):
|
|
166
|
+
"""Extract the element's text content."""
|
|
167
|
+
if name in self.PROPS_MAPPING:
|
|
168
|
+
return self.root.find(self.PROPS_MAPPING[name]).text
|
|
169
|
+
|
|
170
|
+
def __setattr__(self, name, value):
|
|
171
|
+
"""Update the element's text content."""
|
|
172
|
+
if name in self.PROPS_MAPPING:
|
|
173
|
+
try:
|
|
174
|
+
self.root.find(self.PROPS_MAPPING[name]).text = value
|
|
175
|
+
except AttributeError:
|
|
176
|
+
elem = ET.SubElement(self.root, self.PROPS_MAPPING[name])
|
|
177
|
+
elem.text = value
|
|
178
|
+
else:
|
|
179
|
+
object.__setattr__(self, name, value)
|
|
180
|
+
|
|
181
|
+
def get_zip_file(self):
|
|
182
|
+
"""Obtain location to the PRONOM XML Zip file based on the current PRONOM version."""
|
|
183
|
+
return os.path.join(self.conf_dir, 'pronom-xml-v{}.zip'.format(self.pronom_version))
|
|
184
|
+
|
|
185
|
+
def get_signature_file(self):
|
|
186
|
+
"""Obtain location to the current PRONOM signature file."""
|
|
187
|
+
return os.path.join(self.conf_dir, self.pronom_signature)
|
|
188
|
+
|
|
189
|
+
def write(self):
|
|
190
|
+
"""Update versions.xml."""
|
|
191
|
+
# Check that all the fields are defined
|
|
192
|
+
for key, value in six.iteritems(self.PROPS_MAPPING):
|
|
193
|
+
if self.root.find(value) is None:
|
|
194
|
+
raise ValueError('Field {} has not been defined!'.format(key))
|
|
195
|
+
self.tree.write(self.versions_file, xml_declaration=True, method='xml', encoding='utf-8')
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def get_local_pronom_versions(config_dir=CONFIG_DIR):
|
|
199
|
+
"""Return an instance of LocalPronomVersions loaded with `conf/versions.xml`."""
|
|
200
|
+
return LocalPronomVersions(os.path.join(config_dir, 'versions.xml'))
|
data/tools/fido/toxml.py
CHANGED
|
@@ -1,52 +1,60 @@
|
|
|
1
|
-
|
|
1
|
+
#!/usr/bin/env python
|
|
2
2
|
# -*- coding: utf-8 -*-
|
|
3
|
-
#
|
|
4
|
-
# FIDO csv output to XML
|
|
5
|
-
# Author: Maurice de Rooij <maurice.de.rooij@nationaalarchief.nl>, september 2011
|
|
6
|
-
#
|
|
7
|
-
# Usage in combination with FIDO:
|
|
8
|
-
# Windows: python fido.py [ARGS] | python toxml.py > output.xml
|
|
9
|
-
# Linux: fido.py [ARGS] | toxml.py > output.xml
|
|
10
|
-
#
|
|
11
|
-
# Usage afterwards:
|
|
12
|
-
# Windows: type output.csv | toxml.py > output.xml
|
|
13
|
-
# Linux: cat output.csv | toxml.py > output.xml
|
|
14
|
-
#
|
|
15
|
-
# for difference in usage, see:
|
|
16
|
-
# http://bugs.python.org/issue9390
|
|
17
|
-
# http://support.microsoft.com/default.aspx?kbid=321788
|
|
18
|
-
#
|
|
19
3
|
|
|
20
|
-
|
|
4
|
+
"""
|
|
5
|
+
FIDO CSV output to XML.
|
|
6
|
+
|
|
7
|
+
Author: Maurice de Rooij <maurice.de.rooij@nationaalarchief.nl>, September 2011
|
|
8
|
+
|
|
9
|
+
Usage in combination with FIDO:
|
|
10
|
+
- Windows: python fido.py [ARGS] | python toxml.py > output.xml
|
|
11
|
+
- Linux: fido.py [ARGS] | toxml.py > output.xml
|
|
12
|
+
|
|
13
|
+
Usage afterwards:
|
|
14
|
+
- Windows: type output.csv | toxml.py > output.xml
|
|
15
|
+
- Linux: cat output.csv | toxml.py > output.xml
|
|
16
|
+
|
|
17
|
+
For difference in usage, see:
|
|
18
|
+
- http://bugs.python.org/issue9390
|
|
19
|
+
- http://support.microsoft.com/default.aspx?kbid=321788
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import absolute_import
|
|
23
|
+
|
|
21
24
|
import csv
|
|
22
|
-
import
|
|
25
|
+
import sys
|
|
26
|
+
|
|
27
|
+
from . import __version__
|
|
28
|
+
from .pronomutils import get_local_pronom_versions
|
|
23
29
|
|
|
24
|
-
# define FIDO version
|
|
25
|
-
fidoVersion = '1.0'
|
|
26
|
-
# define PRONOM signature version
|
|
27
|
-
signatureVersion = '56'
|
|
28
30
|
|
|
29
|
-
|
|
31
|
+
def main():
|
|
32
|
+
"""Generate XML as read from CSV and send it to the standard output stream."""
|
|
33
|
+
sys.stdout.write("""<?xml version="1.0" encoding="utf-8"?>
|
|
30
34
|
<fido_output>
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
reader = csv.reader(sys.stdin)
|
|
37
|
-
|
|
38
|
-
for row in reader:
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
sys.stdout.write("\n</fido_output>\n")
|
|
35
|
+
<versions>
|
|
36
|
+
<fido_version>{0}</fido_version>
|
|
37
|
+
<signature_version>{1}</signature_version>
|
|
38
|
+
</versions>""".format(__version__, get_local_pronom_versions().pronom_version))
|
|
39
|
+
|
|
40
|
+
reader = csv.reader(sys.stdin)
|
|
41
|
+
|
|
42
|
+
for row in reader:
|
|
43
|
+
sys.stdout.write("""
|
|
44
|
+
<file>
|
|
45
|
+
<filename>{0}</filename>
|
|
46
|
+
<status>{1}</status>
|
|
47
|
+
<matchtype>{2}</matchtype>
|
|
48
|
+
<time>{3}</time>
|
|
49
|
+
<puid>{4}</puid>
|
|
50
|
+
<mimetype>{5}</mimetype>
|
|
51
|
+
<formatname>{6}</formatname>
|
|
52
|
+
<signaturename>{7}</signaturename>
|
|
53
|
+
<filesize>{8}</filesize>
|
|
54
|
+
</file>""".format(row[6], row[0], row[8], row[1], row[2], row[7], row[3], row[4], row[5]))
|
|
55
|
+
|
|
56
|
+
sys.stdout.write("\n</fido_output>\n")
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
if __name__ == '__main__':
|
|
60
|
+
main()
|
|
@@ -1,171 +1,183 @@
|
|
|
1
|
-
|
|
1
|
+
#!/usr/bin/env python
|
|
2
2
|
# -*- coding: utf-8 -*-
|
|
3
|
-
#
|
|
4
|
-
# FIDO SIGNATURE UPDATER
|
|
5
|
-
#
|
|
6
|
-
# Open Planets Foundation (http://www.openplanetsfoundation.org)
|
|
7
|
-
# See License.txt for license information.
|
|
8
|
-
# Download from: https://github.com/openplanets/fido/releases
|
|
9
|
-
# Author: Maurice de Rooij (NANETH), 2012
|
|
10
|
-
#
|
|
11
|
-
# FIDO uses the UK National Archives (TNA) PRONOM File Format and Container descriptions .
|
|
12
|
-
# PRONOM is available from http://www.nationalarchives.gov.uk/pronom/
|
|
13
|
-
#
|
|
14
|
-
import sys, os, urllib, time, zipfile, shutil
|
|
15
3
|
|
|
4
|
+
"""
|
|
5
|
+
FIDO SIGNATURE UPDATER.
|
|
6
|
+
|
|
7
|
+
Open Planets Foundation (http://www.openplanetsfoundation.org)
|
|
8
|
+
See License.txt for license information.
|
|
9
|
+
Download from: https://github.com/openplanets/fido/releases
|
|
10
|
+
Author: Maurice de Rooij (NANETH), 2012
|
|
11
|
+
|
|
12
|
+
FIDO uses the UK National Archives (TNA) PRONOM File Format and Container descriptions.
|
|
13
|
+
PRONOM is available from http://www.nationalarchives.gov.uk/pronom/.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import print_function
|
|
17
|
+
|
|
18
|
+
from argparse import ArgumentParser
|
|
19
|
+
import os
|
|
20
|
+
from shutil import rmtree
|
|
21
|
+
import sys
|
|
22
|
+
import time
|
|
16
23
|
from xml.etree import ElementTree as CET
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
import
|
|
24
|
+
import zipfile
|
|
25
|
+
|
|
26
|
+
from six.moves.urllib.request import urlopen
|
|
27
|
+
|
|
28
|
+
from . import __version__, CONFIG_DIR, query_yes_no
|
|
29
|
+
from .prepare import run as prepare_pronom_to_fido
|
|
30
|
+
from .pronomutils import check_well_formedness, get_local_pronom_versions, get_pronom_signature
|
|
31
|
+
|
|
20
32
|
|
|
21
33
|
defaults = {
|
|
22
|
-
'
|
|
23
|
-
'
|
|
24
|
-
'
|
|
25
|
-
'
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
'http_throttle'
|
|
30
|
-
'
|
|
31
|
-
'
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
34
|
+
'signatureFileName': 'DROID_SignatureFile-v{0}.xml',
|
|
35
|
+
'pronomZipFileName': 'pronom-xml-v{0}.zip',
|
|
36
|
+
'fidoSignatureVersion': 'format_extensions.xml',
|
|
37
|
+
'containerVersion': 'container-signature-20160121.xml', # container version is frozen and needs human attention before updating,
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
options = {
|
|
41
|
+
'http_throttle': 0.5, # in secs, to prevent DoS of PRONOM server
|
|
42
|
+
'tmp_dir': os.path.join(CONFIG_DIR, 'tmp'),
|
|
43
|
+
'deleteTempDirectory': True,
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def run(defaults=defaults):
|
|
35
48
|
"""
|
|
36
|
-
|
|
37
|
-
|
|
49
|
+
Update PRONOM signatures.
|
|
50
|
+
|
|
51
|
+
Interactive script, requires keyboard input.
|
|
38
52
|
"""
|
|
53
|
+
print("FIDO signature updater v{}".format(__version__))
|
|
54
|
+
|
|
39
55
|
try:
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
print
|
|
46
|
-
|
|
47
|
-
if currentVersion == False:
|
|
48
|
-
print "Failed to obtain PRONOM signature file version number, please try again"
|
|
49
|
-
sys.exit()
|
|
50
|
-
print "Querying latest signaturefile version..."
|
|
51
|
-
signatureFile = os.path.join(os.path.abspath(defaults['conf_dir']), defaults['signatureFileName'].format(currentVersion))
|
|
56
|
+
print("Contacting PRONOM...")
|
|
57
|
+
currentVersion = get_pronom_signature("version")
|
|
58
|
+
if not currentVersion:
|
|
59
|
+
sys.exit('Failed to obtain PRONOM signature file version number, please try again.')
|
|
60
|
+
|
|
61
|
+
print("Querying latest signaturefile version...")
|
|
62
|
+
signatureFile = os.path.join(CONFIG_DIR, defaults['signatureFileName'].format(currentVersion))
|
|
52
63
|
if os.path.isfile(signatureFile):
|
|
53
|
-
print
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
print
|
|
58
|
-
currentFile =
|
|
59
|
-
if currentFile
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
print
|
|
66
|
-
print "Extracting PRONOM PUID's from signature file..."
|
|
64
|
+
print("You already have the latest PRONOM signature file, version", currentVersion)
|
|
65
|
+
if not query_yes_no("Update anyway?"):
|
|
66
|
+
sys.exit('Aborting update...')
|
|
67
|
+
|
|
68
|
+
print("Downloading signature file version {}...".format(currentVersion))
|
|
69
|
+
currentFile = get_pronom_signature("file")
|
|
70
|
+
if not currentFile:
|
|
71
|
+
sys.exit('Failed to obtain PRONOM signature file, please try again.')
|
|
72
|
+
print("Writing {0}...".format(defaults['signatureFileName'].format(currentVersion)))
|
|
73
|
+
with open(signatureFile, 'wb') as file_:
|
|
74
|
+
file_.write(currentFile)
|
|
75
|
+
|
|
76
|
+
print("Extracting PRONOM PUID's from signature file...")
|
|
67
77
|
tree = CET.parse(signatureFile)
|
|
68
78
|
puids = []
|
|
69
79
|
for node in tree.iter("{http://www.nationalarchives.gov.uk/pronom/SignatureFile}FileFormat"):
|
|
70
80
|
puids.append(node.get("PUID"))
|
|
71
81
|
numberPuids = len(puids)
|
|
72
|
-
print
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
if
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
tmpdir = os.path.join(os.path.abspath(defaults['conf_dir']), defaults['tmp_dir'])
|
|
82
|
+
print("Found {} PRONOM PUID's".format(numberPuids))
|
|
83
|
+
|
|
84
|
+
print("Downloading signatures can take a while")
|
|
85
|
+
if not query_yes_no("Continue and download signatures?"):
|
|
86
|
+
sys.exit('Aborting update...')
|
|
87
|
+
tmpdir = defaults['tmp_dir']
|
|
79
88
|
if os.path.isdir(tmpdir):
|
|
80
|
-
print
|
|
81
|
-
|
|
82
|
-
if
|
|
83
|
-
print
|
|
84
|
-
resume_download = True
|
|
85
|
-
else:
|
|
86
|
-
resume_download = False
|
|
89
|
+
print("Found previously created temporary folder for download:", tmpdir)
|
|
90
|
+
resume_download = query_yes_no('Do you want to resume download (yes) or start over (no)?')
|
|
91
|
+
if resume_download:
|
|
92
|
+
print("Resuming download...")
|
|
87
93
|
else:
|
|
88
|
-
print
|
|
94
|
+
print("Creating temporary folder for download:", tmpdir)
|
|
89
95
|
try:
|
|
90
96
|
os.mkdir(tmpdir)
|
|
91
97
|
except:
|
|
92
98
|
pass
|
|
93
99
|
if not os.path.isdir(tmpdir):
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
print
|
|
100
|
+
print("Failed to create temporary folder for PUID's, using", tmpdir)
|
|
101
|
+
|
|
102
|
+
print("Downloading signatures, one moment please...")
|
|
97
103
|
one_percent = (float(numberPuids) / 100)
|
|
98
104
|
numfiles = 0
|
|
99
105
|
for puid in puids:
|
|
100
106
|
puidType, puidNum = puid.split("/")
|
|
101
|
-
puidFileName = "puid."+puidType+"."+puidNum+".xml"
|
|
107
|
+
puidFileName = "puid." + puidType + "." + puidNum + ".xml"
|
|
102
108
|
filename = os.path.join(tmpdir, puidFileName)
|
|
103
|
-
if os.path.isfile(filename) and
|
|
109
|
+
if os.path.isfile(filename) and check_well_formedness(filename) and resume_download:
|
|
104
110
|
numfiles += 1
|
|
105
111
|
continue
|
|
106
|
-
|
|
112
|
+
puid_url = "http://www.nationalarchives.gov.uk/pronom/{}.xml".format(puid)
|
|
107
113
|
try:
|
|
108
|
-
filehandle =
|
|
109
|
-
except Exception
|
|
110
|
-
print
|
|
111
|
-
print
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
puidfile.write(lines)
|
|
117
|
-
puidfile.close()
|
|
114
|
+
filehandle = urlopen(puid_url)
|
|
115
|
+
except Exception as e:
|
|
116
|
+
print("Failed to download signature file:", puid_url)
|
|
117
|
+
print("Error:", str(e))
|
|
118
|
+
sys.exit('Please restart and resume download.')
|
|
119
|
+
with open(filename, 'wb') as file_:
|
|
120
|
+
for lines in filehandle.readlines():
|
|
121
|
+
file_.write(lines)
|
|
118
122
|
filehandle.close()
|
|
119
|
-
if not
|
|
123
|
+
if not check_well_formedness(filename):
|
|
120
124
|
os.unlink(filename)
|
|
121
125
|
continue
|
|
122
126
|
numfiles += 1
|
|
123
127
|
percent = int(float(numfiles) / one_percent)
|
|
124
|
-
print "
|
|
125
|
-
print str(percent)+"%",
|
|
128
|
+
print(r"{}/{} files [{}%]".format(numfiles, numberPuids, percent))
|
|
126
129
|
time.sleep(defaults['http_throttle'])
|
|
127
|
-
print
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
except:
|
|
132
|
-
compression = zipfile.ZIP_STORED
|
|
130
|
+
print("100%")
|
|
131
|
+
|
|
132
|
+
print("Creating PRONOM zip...")
|
|
133
|
+
compression = zipfile.ZIP_DEFLATED if 'zlib' in sys.modules else zipfile.ZIP_STORED
|
|
133
134
|
modes = {zipfile.ZIP_DEFLATED: 'deflated', zipfile.ZIP_STORED: 'stored'}
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
print "adding files with compression mode '"+modes[compression]+"'"
|
|
135
|
+
zf = zipfile.ZipFile(os.path.join(CONFIG_DIR, defaults['pronomZipFileName'].format(currentVersion)), mode='w')
|
|
136
|
+
print("Adding files with compression mode", modes[compression])
|
|
137
137
|
for puid in puids:
|
|
138
138
|
puidType, puidNum = puid.split("/")
|
|
139
|
-
puidFileName = "puid.
|
|
140
|
-
filename = os.path.join(
|
|
139
|
+
puidFileName = "puid.{}.{}.xml".format(puidType, puidNum)
|
|
140
|
+
filename = os.path.join(tmpdir, puidFileName)
|
|
141
141
|
if os.path.isfile(filename):
|
|
142
142
|
zf.write(filename, arcname=puidFileName, compress_type=compression)
|
|
143
|
-
|
|
143
|
+
if defaults['deleteTempDirectory']:
|
|
144
|
+
os.unlink(filename)
|
|
144
145
|
zf.close()
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
print "FIDO signatures successfully updated"
|
|
165
|
-
sys.exit()
|
|
146
|
+
|
|
147
|
+
if defaults['deleteTempDirectory']:
|
|
148
|
+
print("Deleting temporary folder and files...")
|
|
149
|
+
rmtree(tmpdir, ignore_errors=True)
|
|
150
|
+
|
|
151
|
+
print('Updating versions.xml...')
|
|
152
|
+
versions = get_local_pronom_versions()
|
|
153
|
+
versions.pronom_version = str(currentVersion)
|
|
154
|
+
versions.pronom_signature = "formats-v" + str(currentVersion) + ".xml"
|
|
155
|
+
versions.pronom_container_signature = defaults['containerVersion']
|
|
156
|
+
versions.fido_extension_signature = defaults['fidoSignatureVersion']
|
|
157
|
+
versions.update_script = __version__
|
|
158
|
+
versions.write()
|
|
159
|
+
|
|
160
|
+
# TODO: there should be a check here to handle prepare.main exit() signal (-1/0/1/...)
|
|
161
|
+
print("Preparing to convert PRONOM formats to FIDO signatures...")
|
|
162
|
+
prepare_pronom_to_fido()
|
|
163
|
+
print("FIDO signatures successfully updated")
|
|
164
|
+
|
|
166
165
|
except KeyboardInterrupt:
|
|
167
|
-
|
|
168
|
-
|
|
166
|
+
sys.exit('Aborting update...')
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def main():
|
|
170
|
+
"""Main CLI entrypoint."""
|
|
171
|
+
parser = ArgumentParser(description='Download and convert the latest PRONOM signatures')
|
|
172
|
+
parser.add_argument('-tmpdir', default=options['tmp_dir'], help='Location to store temporary files', dest='tmp_dir')
|
|
173
|
+
parser.add_argument('-keep_tmp', default=options['deleteTempDirectory'], help='Do not delete temporary files after completion', dest='deleteTempDirectory', action='store_false')
|
|
174
|
+
parser.add_argument('-http_throttle', default=options['http_throttle'], help='Time (in seconds) to wait between downloads', type=float, dest='http_throttle')
|
|
175
|
+
args = parser.parse_args()
|
|
176
|
+
opts = defaults.copy()
|
|
177
|
+
opts.update(vars(args))
|
|
178
|
+
|
|
179
|
+
run(opts)
|
|
180
|
+
|
|
169
181
|
|
|
170
182
|
if __name__ == '__main__':
|
|
171
|
-
main(
|
|
183
|
+
main()
|