libis-format 0.9.32 → 0.9.33
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/data/types.yml +30 -16
- data/lib/libis/format/config.rb +7 -18
- data/lib/libis/format/converter/image_converter.rb +6 -0
- data/lib/libis/format/droid.rb +82 -25
- data/lib/libis/format/extension_identification.rb +55 -0
- data/lib/libis/format/fido.rb +57 -72
- data/lib/libis/format/file_tool.rb +76 -0
- data/lib/libis/format/identification_tool.rb +174 -0
- data/lib/libis/format/identifier.rb +129 -117
- data/lib/libis/format/type_database.rb +36 -5
- data/lib/libis/format/version.rb +1 -1
- data/lib/libis/format.rb +3 -0
- data/libis-format.gemspec +2 -1
- data/spec/converter_spec.rb +6 -4
- data/spec/identifier_spec.rb +125 -34
- metadata +21 -126
- data/tools/droid/DROID_SignatureFile_V90.xml +0 -40182
- data/tools/droid/container-signature-20170330.xml +0 -3584
- data/tools/droid/droid-command-line-6.3.jar +0 -0
- data/tools/droid/droid.bat +0 -152
- data/tools/droid/droid.sh +0 -152
- data/tools/droid/lib/XmlSchema-1.4.7.jar +0 -0
- data/tools/droid/lib/activation-1.1.jar +0 -0
- data/tools/droid/lib/aopalliance-1.0.jar +0 -0
- data/tools/droid/lib/asm-2.2.3.jar +0 -0
- data/tools/droid/lib/aspectjrt-1.8.7.jar +0 -0
- data/tools/droid/lib/aspectjweaver-1.8.7.jar +0 -0
- data/tools/droid/lib/bcmail-jdk14-138.jar +0 -0
- data/tools/droid/lib/bcprov-jdk14-138.jar +0 -0
- data/tools/droid/lib/beansbinding-1.2.1.jar +0 -0
- data/tools/droid/lib/byteseek-2.0.3.jar +0 -0
- data/tools/droid/lib/cglib-nodep-2.2.2.jar +0 -0
- data/tools/droid/lib/classmate-1.0.0.jar +0 -0
- data/tools/droid/lib/commons-cli-1.2.jar +0 -0
- data/tools/droid/lib/commons-codec-1.10.jar +0 -0
- data/tools/droid/lib/commons-collections-3.2.2.jar +0 -0
- data/tools/droid/lib/commons-compress-1.4.1.jar +0 -0
- data/tools/droid/lib/commons-configuration-1.8.jar +0 -0
- data/tools/droid/lib/commons-dbcp-1.4.jar +0 -0
- data/tools/droid/lib/commons-httpclient-3.1.jar +0 -0
- data/tools/droid/lib/commons-io-2.4.jar +0 -0
- data/tools/droid/lib/commons-lang-2.6.jar +0 -0
- data/tools/droid/lib/commons-logging-1.1.1.jar +0 -0
- data/tools/droid/lib/commons-pool-1.5.4.jar +0 -0
- data/tools/droid/lib/cxf-api-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-common-schemas-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-common-utilities-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-http-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-soap-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-xml-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-core-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-databinding-jaxb-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-frontend-jaxws-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-frontend-simple-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-transports-http-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-ws-addr-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-tools-common-2.2.12.jar +0 -0
- data/tools/droid/lib/de.huxhorn.lilith.3rdparty.flyingsaucer.core-renderer-8RC1.jar +0 -0
- data/tools/droid/lib/derby-10.10.2.0.jar +0 -0
- data/tools/droid/lib/droid-container-6.3.jar +0 -0
- data/tools/droid/lib/droid-core-6.3.jar +0 -0
- data/tools/droid/lib/droid-core-interfaces-6.3.jar +0 -0
- data/tools/droid/lib/droid-export-6.3.jar +0 -0
- data/tools/droid/lib/droid-export-interfaces-6.3.jar +0 -0
- data/tools/droid/lib/droid-help-6.3.jar +0 -0
- data/tools/droid/lib/droid-report-6.3.jar +0 -0
- data/tools/droid/lib/droid-report-interfaces-6.3.jar +0 -0
- data/tools/droid/lib/droid-results-6.3.jar +0 -0
- data/tools/droid/lib/geronimo-activation_1.1_spec-1.0.2.jar +0 -0
- data/tools/droid/lib/geronimo-annotation_1.0_spec-1.1.1.jar +0 -0
- data/tools/droid/lib/geronimo-javamail_1.4_spec-1.6.jar +0 -0
- data/tools/droid/lib/geronimo-jaxws_2.1_spec-1.0.jar +0 -0
- data/tools/droid/lib/geronimo-stax-api_1.0_spec-1.0.1.jar +0 -0
- data/tools/droid/lib/geronimo-ws-metadata_2.0_spec-1.1.2.jar +0 -0
- data/tools/droid/lib/hibernate-validator-5.1.0.Final.jar +0 -0
- data/tools/droid/lib/itext-2.0.8.jar +0 -0
- data/tools/droid/lib/javahelp-2.0.05.jar +0 -0
- data/tools/droid/lib/jaxb-api-2.1.jar +0 -0
- data/tools/droid/lib/jaxb-impl-2.1.13.jar +0 -0
- data/tools/droid/lib/jboss-logging-3.1.3.GA.jar +0 -0
- data/tools/droid/lib/joda-time-1.6.2.jar +0 -0
- data/tools/droid/lib/jra-1.0-alpha-4.jar +0 -0
- data/tools/droid/lib/jta-1.1.jar +0 -0
- data/tools/droid/lib/jwat-arc-1.0.3.jar +0 -0
- data/tools/droid/lib/jwat-archive-common-1.0.3.jar +0 -0
- data/tools/droid/lib/jwat-common-1.0.3.jar +0 -0
- data/tools/droid/lib/jwat-gzip-1.0.3.jar +0 -0
- data/tools/droid/lib/jwat-warc-1.0.2.jar +0 -0
- data/tools/droid/lib/log4j-1.2.13.jar +0 -0
- data/tools/droid/lib/neethi-2.0.4.jar +0 -0
- data/tools/droid/lib/opencsv-2.3.jar +0 -0
- data/tools/droid/lib/org-netbeans-swing-outline-7.2.jar +0 -0
- data/tools/droid/lib/org-openide-util-7.2.jar +0 -0
- data/tools/droid/lib/org-openide-util-lookup-7.2.jar +0 -0
- data/tools/droid/lib/poi-3.13.jar +0 -0
- data/tools/droid/lib/saaj-api-1.3.jar +0 -0
- data/tools/droid/lib/saaj-impl-1.3.2.jar +0 -0
- data/tools/droid/lib/slf4j-api-1.4.2.jar +0 -0
- data/tools/droid/lib/slf4j-log4j12-1.4.2.jar +0 -0
- data/tools/droid/lib/spring-aop-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-beans-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-context-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-core-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-expression-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-jdbc-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-orm-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-tx-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-web-2.5.6.jar +0 -0
- data/tools/droid/lib/stax-api-1.0-2.jar +0 -0
- data/tools/droid/lib/trove4j-3.0.3.jar +0 -0
- data/tools/droid/lib/truezip-6.8.4.jar +0 -0
- data/tools/droid/lib/validation-api-1.1.0.Final.jar +0 -0
- data/tools/droid/lib/wsdl4j-1.6.2.jar +0 -0
- data/tools/droid/lib/wstx-asl-3.2.9.jar +0 -0
- data/tools/droid/lib/xercesImpl-2.9.1.jar +0 -0
- data/tools/droid/lib/xml-apis-1.3.04.jar +0 -0
- data/tools/droid/lib/xml-resolver-1.2.jar +0 -0
- data/tools/droid/lib/xz-1.0.jar +0 -0
- data/tools/fido/__init__.py +0 -50
- data/tools/fido/conf/DROID_SignatureFile-v90.xml +0 -2
- data/tools/fido/conf/container-signature-20170330.xml +0 -3584
- data/tools/fido/conf/dc.xsd +0 -119
- data/tools/fido/conf/dcmitype.xsd +0 -53
- data/tools/fido/conf/dcterms.xsd +0 -383
- data/tools/fido/conf/fido-formats.xsd +0 -173
- data/tools/fido/conf/format_extension_template.xml +0 -105
- data/tools/fido/conf/format_extensions.xml +0 -484
- data/tools/fido/conf/formats-v90.xml +0 -48877
- data/tools/fido/conf/pronom-xml-v90.zip +0 -0
- data/tools/fido/conf/versions.xml +0 -8
- data/tools/fido/fido.bat +0 -4
- data/tools/fido/fido.py +0 -884
- data/tools/fido/fido.sh +0 -5
- data/tools/fido/package.py +0 -96
- data/tools/fido/prepare.py +0 -645
- data/tools/fido/pronomutils.py +0 -200
- data/tools/fido/toxml.py +0 -60
- data/tools/fido/update_signatures.py +0 -183
data/tools/fido/pronomutils.py
DELETED
@@ -1,200 +0,0 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
|
3
|
-
"""
|
4
|
-
PRONOM UTILS.
|
5
|
-
|
6
|
-
PYTHON FUNCTION TO QUERY PRONOM VERSION
|
7
|
-
AND DOWNLOAD SIGNATUREFILE
|
8
|
-
USES PRONOM SOAP SERVICE
|
9
|
-
|
10
|
-
Open Planets Foundation (http://www.openplanetsfoundation.org)
|
11
|
-
See License.txt for license information.
|
12
|
-
Download from: http://github.com/openplanets/fido/downloads
|
13
|
-
Author: Maurice de Rooij (OPF/NANETH), 2012
|
14
|
-
|
15
|
-
PRONOM UTILS is a library used by FIDO.
|
16
|
-
FIDO uses the UK National Archives (TNA) PRONOM File Format and Container descriptions.
|
17
|
-
PRONOM is available from http://www.nationalarchives.gov.uk/pronom/
|
18
|
-
"""
|
19
|
-
|
20
|
-
from __future__ import absolute_import
|
21
|
-
|
22
|
-
import os
|
23
|
-
import re
|
24
|
-
import sys
|
25
|
-
from xml.etree import ElementTree as ET
|
26
|
-
from xml.etree.ElementTree import parse, ParseError
|
27
|
-
from xml.parsers.expat import ExpatError, ParserCreate
|
28
|
-
|
29
|
-
import six
|
30
|
-
from six.moves import http_client
|
31
|
-
|
32
|
-
from . import __version__, CONFIG_DIR
|
33
|
-
|
34
|
-
|
35
|
-
def check_well_formedness(filename, error=False):
|
36
|
-
"""
|
37
|
-
Check if a given file contains valid XML.
|
38
|
-
|
39
|
-
:param filename: file from which the XML is read.
|
40
|
-
:param error: whether or not print to `stderr` upon error.
|
41
|
-
:returns: whether the file contains valid XML.
|
42
|
-
"""
|
43
|
-
parser = ParserCreate()
|
44
|
-
try:
|
45
|
-
parser.ParseFile(open(filename, "r"))
|
46
|
-
except ExpatError as e:
|
47
|
-
if error is not False:
|
48
|
-
sys.stderr.write("check_well_formedness: %s: %s;\n" % (filename, e))
|
49
|
-
return False
|
50
|
-
return True
|
51
|
-
|
52
|
-
|
53
|
-
def get_pronom_signature(type_):
|
54
|
-
"""
|
55
|
-
Get PRONOM signature.
|
56
|
-
|
57
|
-
Return latest signature file version number as int when `type_` equals
|
58
|
-
"version" or return latest signature XML file as string when `type_` equals
|
59
|
-
"file". Upon error, write to `stderr` and returls `False`.
|
60
|
-
"""
|
61
|
-
try:
|
62
|
-
soapVersionContainer = """<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><getSignatureFileVersionV1 xmlns="http://pronom.nationalarchives.gov.uk" /></soap:Body></soap:Envelope>"""
|
63
|
-
soapFileContainer = """<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><getSignatureFileV1 xmlns="http://pronom.nationalarchives.gov.uk" /></soap:Body></soap:Envelope>"""
|
64
|
-
soapVersionHeader = """\"http://pronom.nationalarchives.gov.uk:getSignatureFileVersionV1In\""""
|
65
|
-
soapFileHeader = """\"http://pronom.nationalarchives.gov.uk:getSignatureFileV1In\""""
|
66
|
-
if type_ == "version":
|
67
|
-
soapAction = soapVersionHeader
|
68
|
-
soapStr = soapVersionContainer
|
69
|
-
elif type_ == "file":
|
70
|
-
soapAction = soapFileHeader
|
71
|
-
soapStr = soapFileContainer
|
72
|
-
else:
|
73
|
-
sys.stderr.write("get_pronom_signature(): unknown type: " + type_)
|
74
|
-
return False
|
75
|
-
webservice = http_client.HTTP("www.nationalarchives.gov.uk")
|
76
|
-
webservice.putrequest("POST", "/pronom/service.asmx")
|
77
|
-
webservice.putheader("Host", "www.nationalarchives.gov.uk")
|
78
|
-
webservice.putheader("User-Agent", "PRONOM UTILS v{0} (OPF)".format(__version__))
|
79
|
-
webservice.putheader("Content-type", "text/xml; charset=\"UTF-8\"")
|
80
|
-
webservice.putheader("Content-length", "%d" % len(soapStr))
|
81
|
-
webservice.putheader("SOAPAction", soapAction)
|
82
|
-
try:
|
83
|
-
webservice.endheaders()
|
84
|
-
except Exception as e:
|
85
|
-
sys.stderr.write("get_pronom_signature(): failed to contact PRONOM;\n%s\n" % (e))
|
86
|
-
sys.exit()
|
87
|
-
webservice.send(soapStr)
|
88
|
-
statuscode, statusmessage, header = webservice.getreply()
|
89
|
-
if statuscode == 200:
|
90
|
-
xml = webservice.getfile()
|
91
|
-
if type_ == "version":
|
92
|
-
exp = re.compile("\<Version\>([0-9]{1,4})\<\/Version\>")
|
93
|
-
sigxml = exp.search(xml.read())
|
94
|
-
if len(sigxml.group(1)) > 0:
|
95
|
-
return int(sigxml.group(1))
|
96
|
-
else:
|
97
|
-
sys.stderr.write("get_pronom_signature(): could not parse VERSION from SOAP response: " + type_)
|
98
|
-
return False
|
99
|
-
if type_ == "file":
|
100
|
-
exp = re.compile("\<SignatureFile\>.*\<\/SignatureFile\>")
|
101
|
-
sigxml = exp.search(xml.read())
|
102
|
-
sigtxt = sigxml.group(0) if sigxml else ''
|
103
|
-
if len(sigtxt) > 0:
|
104
|
-
tmpfile = "./tmp_getPronomSignature.xml"
|
105
|
-
with open(tmpfile, 'wb') as file_:
|
106
|
-
file_.write("""<?xml version="1.0" encoding="UTF-8"?>""" + "\n")
|
107
|
-
file_.write(sigtxt)
|
108
|
-
if not check_well_formedness(tmpfile):
|
109
|
-
os.unlink(tmpfile)
|
110
|
-
sys.stderr.write("get_pronom_signature(): signaturefile not well formed")
|
111
|
-
return False
|
112
|
-
else:
|
113
|
-
os.unlink(tmpfile)
|
114
|
-
return """<?xml version="1.0" encoding="UTF-8"?>""" + "\n" + sigtxt
|
115
|
-
else:
|
116
|
-
sys.stderr.write("get_pronom_signature(): could not parse XML from SOAP response: " + type_)
|
117
|
-
return False
|
118
|
-
else:
|
119
|
-
sys.stderr.write("get_pronom_signature(): webservice error: '" + str(statuscode) + " " + statusmessage + "'\n")
|
120
|
-
return False
|
121
|
-
sys.stderr.write("get_pronom_signature(): unexpected return")
|
122
|
-
return False
|
123
|
-
except Exception as e:
|
124
|
-
sys.stderr.write("get_pronom_signature(): unknown error: " + str(e))
|
125
|
-
return False
|
126
|
-
|
127
|
-
|
128
|
-
class LocalPronomVersions(object):
|
129
|
-
"""
|
130
|
-
Parse local PRONOM signature versions XML file.
|
131
|
-
|
132
|
-
This is how the XML document should look like:
|
133
|
-
|
134
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
135
|
-
<versions>
|
136
|
-
<pronomVersion>84</pronomVersion>
|
137
|
-
<pronomSignature>formats-v84.xml</pronomSignature>
|
138
|
-
<pronomContainerSignature>container-signature-20160121.xml</pronomContainerSignature>
|
139
|
-
<fidoExtensionSignature>format_extensions.xml</fidoExtensionSignature>
|
140
|
-
<updateScript>1.2.2</updateScript>
|
141
|
-
</versions>
|
142
|
-
"""
|
143
|
-
|
144
|
-
PROPS_MAPPING = {
|
145
|
-
'pronom_version': 'pronomVersion',
|
146
|
-
'pronom_signature': 'pronomSignature',
|
147
|
-
'pronom_container_signature': 'pronomContainerSignature',
|
148
|
-
'fido_extension_signature': 'fidoExtensionSignature',
|
149
|
-
'update_script': 'updateScript',
|
150
|
-
}
|
151
|
-
|
152
|
-
ROOT_ELEMENT = 'versions'
|
153
|
-
|
154
|
-
def __init__(self, versions_file):
|
155
|
-
"""Instantiate class based on the file indicated in `versions_file`."""
|
156
|
-
self.versions_file = versions_file
|
157
|
-
self.conf_dir = os.path.abspath(os.path.dirname(versions_file))
|
158
|
-
try:
|
159
|
-
self.tree = parse(versions_file)
|
160
|
-
self.root = self.tree.getroot()
|
161
|
-
except (ParseError, IOError):
|
162
|
-
self.root = ET.Element(self.ROOT_ELEMENT)
|
163
|
-
self.tree = ET.ElementTree(self.root)
|
164
|
-
|
165
|
-
def __getattr__(self, name):
|
166
|
-
"""Extract the element's text content."""
|
167
|
-
if name in self.PROPS_MAPPING:
|
168
|
-
return self.root.find(self.PROPS_MAPPING[name]).text
|
169
|
-
|
170
|
-
def __setattr__(self, name, value):
|
171
|
-
"""Update the element's text content."""
|
172
|
-
if name in self.PROPS_MAPPING:
|
173
|
-
try:
|
174
|
-
self.root.find(self.PROPS_MAPPING[name]).text = value
|
175
|
-
except AttributeError:
|
176
|
-
elem = ET.SubElement(self.root, self.PROPS_MAPPING[name])
|
177
|
-
elem.text = value
|
178
|
-
else:
|
179
|
-
object.__setattr__(self, name, value)
|
180
|
-
|
181
|
-
def get_zip_file(self):
|
182
|
-
"""Obtain location to the PRONOM XML Zip file based on the current PRONOM version."""
|
183
|
-
return os.path.join(self.conf_dir, 'pronom-xml-v{}.zip'.format(self.pronom_version))
|
184
|
-
|
185
|
-
def get_signature_file(self):
|
186
|
-
"""Obtain location to the current PRONOM signature file."""
|
187
|
-
return os.path.join(self.conf_dir, self.pronom_signature)
|
188
|
-
|
189
|
-
def write(self):
|
190
|
-
"""Update versions.xml."""
|
191
|
-
# Check that all the fields are defined
|
192
|
-
for key, value in six.iteritems(self.PROPS_MAPPING):
|
193
|
-
if self.root.find(value) is None:
|
194
|
-
raise ValueError('Field {} has not been defined!'.format(key))
|
195
|
-
self.tree.write(self.versions_file, xml_declaration=True, method='xml', encoding='utf-8')
|
196
|
-
|
197
|
-
|
198
|
-
def get_local_pronom_versions(config_dir=CONFIG_DIR):
|
199
|
-
"""Return an instance of LocalPronomVersions loaded with `conf/versions.xml`."""
|
200
|
-
return LocalPronomVersions(os.path.join(config_dir, 'versions.xml'))
|
data/tools/fido/toxml.py
DELETED
@@ -1,60 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
|
4
|
-
"""
|
5
|
-
FIDO CSV output to XML.
|
6
|
-
|
7
|
-
Author: Maurice de Rooij <maurice.de.rooij@nationaalarchief.nl>, September 2011
|
8
|
-
|
9
|
-
Usage in combination with FIDO:
|
10
|
-
- Windows: python fido.py [ARGS] | python toxml.py > output.xml
|
11
|
-
- Linux: fido.py [ARGS] | toxml.py > output.xml
|
12
|
-
|
13
|
-
Usage afterwards:
|
14
|
-
- Windows: type output.csv | toxml.py > output.xml
|
15
|
-
- Linux: cat output.csv | toxml.py > output.xml
|
16
|
-
|
17
|
-
For difference in usage, see:
|
18
|
-
- http://bugs.python.org/issue9390
|
19
|
-
- http://support.microsoft.com/default.aspx?kbid=321788
|
20
|
-
"""
|
21
|
-
|
22
|
-
from __future__ import absolute_import
|
23
|
-
|
24
|
-
import csv
|
25
|
-
import sys
|
26
|
-
|
27
|
-
from . import __version__
|
28
|
-
from .pronomutils import get_local_pronom_versions
|
29
|
-
|
30
|
-
|
31
|
-
def main():
|
32
|
-
"""Generate XML as read from CSV and send it to the standard output stream."""
|
33
|
-
sys.stdout.write("""<?xml version="1.0" encoding="utf-8"?>
|
34
|
-
<fido_output>
|
35
|
-
<versions>
|
36
|
-
<fido_version>{0}</fido_version>
|
37
|
-
<signature_version>{1}</signature_version>
|
38
|
-
</versions>""".format(__version__, get_local_pronom_versions().pronom_version))
|
39
|
-
|
40
|
-
reader = csv.reader(sys.stdin)
|
41
|
-
|
42
|
-
for row in reader:
|
43
|
-
sys.stdout.write("""
|
44
|
-
<file>
|
45
|
-
<filename>{0}</filename>
|
46
|
-
<status>{1}</status>
|
47
|
-
<matchtype>{2}</matchtype>
|
48
|
-
<time>{3}</time>
|
49
|
-
<puid>{4}</puid>
|
50
|
-
<mimetype>{5}</mimetype>
|
51
|
-
<formatname>{6}</formatname>
|
52
|
-
<signaturename>{7}</signaturename>
|
53
|
-
<filesize>{8}</filesize>
|
54
|
-
</file>""".format(row[6], row[0], row[8], row[1], row[2], row[7], row[3], row[4], row[5]))
|
55
|
-
|
56
|
-
sys.stdout.write("\n</fido_output>\n")
|
57
|
-
|
58
|
-
|
59
|
-
if __name__ == '__main__':
|
60
|
-
main()
|
@@ -1,183 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
|
4
|
-
"""
|
5
|
-
FIDO SIGNATURE UPDATER.
|
6
|
-
|
7
|
-
Open Planets Foundation (http://www.openplanetsfoundation.org)
|
8
|
-
See License.txt for license information.
|
9
|
-
Download from: https://github.com/openplanets/fido/releases
|
10
|
-
Author: Maurice de Rooij (NANETH), 2012
|
11
|
-
|
12
|
-
FIDO uses the UK National Archives (TNA) PRONOM File Format and Container descriptions.
|
13
|
-
PRONOM is available from http://www.nationalarchives.gov.uk/pronom/.
|
14
|
-
"""
|
15
|
-
|
16
|
-
from __future__ import print_function
|
17
|
-
|
18
|
-
from argparse import ArgumentParser
|
19
|
-
import os
|
20
|
-
from shutil import rmtree
|
21
|
-
import sys
|
22
|
-
import time
|
23
|
-
from xml.etree import ElementTree as CET
|
24
|
-
import zipfile
|
25
|
-
|
26
|
-
from six.moves.urllib.request import urlopen
|
27
|
-
|
28
|
-
from . import __version__, CONFIG_DIR, query_yes_no
|
29
|
-
from .prepare import run as prepare_pronom_to_fido
|
30
|
-
from .pronomutils import check_well_formedness, get_local_pronom_versions, get_pronom_signature
|
31
|
-
|
32
|
-
|
33
|
-
defaults = {
|
34
|
-
'signatureFileName': 'DROID_SignatureFile-v{0}.xml',
|
35
|
-
'pronomZipFileName': 'pronom-xml-v{0}.zip',
|
36
|
-
'fidoSignatureVersion': 'format_extensions.xml',
|
37
|
-
'containerVersion': 'container-signature-20160121.xml', # container version is frozen and needs human attention before updating,
|
38
|
-
}
|
39
|
-
|
40
|
-
options = {
|
41
|
-
'http_throttle': 0.5, # in secs, to prevent DoS of PRONOM server
|
42
|
-
'tmp_dir': os.path.join(CONFIG_DIR, 'tmp'),
|
43
|
-
'deleteTempDirectory': True,
|
44
|
-
}
|
45
|
-
|
46
|
-
|
47
|
-
def run(defaults=defaults):
|
48
|
-
"""
|
49
|
-
Update PRONOM signatures.
|
50
|
-
|
51
|
-
Interactive script, requires keyboard input.
|
52
|
-
"""
|
53
|
-
print("FIDO signature updater v{}".format(__version__))
|
54
|
-
|
55
|
-
try:
|
56
|
-
print("Contacting PRONOM...")
|
57
|
-
currentVersion = get_pronom_signature("version")
|
58
|
-
if not currentVersion:
|
59
|
-
sys.exit('Failed to obtain PRONOM signature file version number, please try again.')
|
60
|
-
|
61
|
-
print("Querying latest signaturefile version...")
|
62
|
-
signatureFile = os.path.join(CONFIG_DIR, defaults['signatureFileName'].format(currentVersion))
|
63
|
-
if os.path.isfile(signatureFile):
|
64
|
-
print("You already have the latest PRONOM signature file, version", currentVersion)
|
65
|
-
if not query_yes_no("Update anyway?"):
|
66
|
-
sys.exit('Aborting update...')
|
67
|
-
|
68
|
-
print("Downloading signature file version {}...".format(currentVersion))
|
69
|
-
currentFile = get_pronom_signature("file")
|
70
|
-
if not currentFile:
|
71
|
-
sys.exit('Failed to obtain PRONOM signature file, please try again.')
|
72
|
-
print("Writing {0}...".format(defaults['signatureFileName'].format(currentVersion)))
|
73
|
-
with open(signatureFile, 'wb') as file_:
|
74
|
-
file_.write(currentFile)
|
75
|
-
|
76
|
-
print("Extracting PRONOM PUID's from signature file...")
|
77
|
-
tree = CET.parse(signatureFile)
|
78
|
-
puids = []
|
79
|
-
for node in tree.iter("{http://www.nationalarchives.gov.uk/pronom/SignatureFile}FileFormat"):
|
80
|
-
puids.append(node.get("PUID"))
|
81
|
-
numberPuids = len(puids)
|
82
|
-
print("Found {} PRONOM PUID's".format(numberPuids))
|
83
|
-
|
84
|
-
print("Downloading signatures can take a while")
|
85
|
-
if not query_yes_no("Continue and download signatures?"):
|
86
|
-
sys.exit('Aborting update...')
|
87
|
-
tmpdir = defaults['tmp_dir']
|
88
|
-
if os.path.isdir(tmpdir):
|
89
|
-
print("Found previously created temporary folder for download:", tmpdir)
|
90
|
-
resume_download = query_yes_no('Do you want to resume download (yes) or start over (no)?')
|
91
|
-
if resume_download:
|
92
|
-
print("Resuming download...")
|
93
|
-
else:
|
94
|
-
print("Creating temporary folder for download:", tmpdir)
|
95
|
-
try:
|
96
|
-
os.mkdir(tmpdir)
|
97
|
-
except:
|
98
|
-
pass
|
99
|
-
if not os.path.isdir(tmpdir):
|
100
|
-
print("Failed to create temporary folder for PUID's, using", tmpdir)
|
101
|
-
|
102
|
-
print("Downloading signatures, one moment please...")
|
103
|
-
one_percent = (float(numberPuids) / 100)
|
104
|
-
numfiles = 0
|
105
|
-
for puid in puids:
|
106
|
-
puidType, puidNum = puid.split("/")
|
107
|
-
puidFileName = "puid." + puidType + "." + puidNum + ".xml"
|
108
|
-
filename = os.path.join(tmpdir, puidFileName)
|
109
|
-
if os.path.isfile(filename) and check_well_formedness(filename) and resume_download:
|
110
|
-
numfiles += 1
|
111
|
-
continue
|
112
|
-
puid_url = "http://www.nationalarchives.gov.uk/pronom/{}.xml".format(puid)
|
113
|
-
try:
|
114
|
-
filehandle = urlopen(puid_url)
|
115
|
-
except Exception as e:
|
116
|
-
print("Failed to download signature file:", puid_url)
|
117
|
-
print("Error:", str(e))
|
118
|
-
sys.exit('Please restart and resume download.')
|
119
|
-
with open(filename, 'wb') as file_:
|
120
|
-
for lines in filehandle.readlines():
|
121
|
-
file_.write(lines)
|
122
|
-
filehandle.close()
|
123
|
-
if not check_well_formedness(filename):
|
124
|
-
os.unlink(filename)
|
125
|
-
continue
|
126
|
-
numfiles += 1
|
127
|
-
percent = int(float(numfiles) / one_percent)
|
128
|
-
print(r"{}/{} files [{}%]".format(numfiles, numberPuids, percent))
|
129
|
-
time.sleep(defaults['http_throttle'])
|
130
|
-
print("100%")
|
131
|
-
|
132
|
-
print("Creating PRONOM zip...")
|
133
|
-
compression = zipfile.ZIP_DEFLATED if 'zlib' in sys.modules else zipfile.ZIP_STORED
|
134
|
-
modes = {zipfile.ZIP_DEFLATED: 'deflated', zipfile.ZIP_STORED: 'stored'}
|
135
|
-
zf = zipfile.ZipFile(os.path.join(CONFIG_DIR, defaults['pronomZipFileName'].format(currentVersion)), mode='w')
|
136
|
-
print("Adding files with compression mode", modes[compression])
|
137
|
-
for puid in puids:
|
138
|
-
puidType, puidNum = puid.split("/")
|
139
|
-
puidFileName = "puid.{}.{}.xml".format(puidType, puidNum)
|
140
|
-
filename = os.path.join(tmpdir, puidFileName)
|
141
|
-
if os.path.isfile(filename):
|
142
|
-
zf.write(filename, arcname=puidFileName, compress_type=compression)
|
143
|
-
if defaults['deleteTempDirectory']:
|
144
|
-
os.unlink(filename)
|
145
|
-
zf.close()
|
146
|
-
|
147
|
-
if defaults['deleteTempDirectory']:
|
148
|
-
print("Deleting temporary folder and files...")
|
149
|
-
rmtree(tmpdir, ignore_errors=True)
|
150
|
-
|
151
|
-
print('Updating versions.xml...')
|
152
|
-
versions = get_local_pronom_versions()
|
153
|
-
versions.pronom_version = str(currentVersion)
|
154
|
-
versions.pronom_signature = "formats-v" + str(currentVersion) + ".xml"
|
155
|
-
versions.pronom_container_signature = defaults['containerVersion']
|
156
|
-
versions.fido_extension_signature = defaults['fidoSignatureVersion']
|
157
|
-
versions.update_script = __version__
|
158
|
-
versions.write()
|
159
|
-
|
160
|
-
# TODO: there should be a check here to handle prepare.main exit() signal (-1/0/1/...)
|
161
|
-
print("Preparing to convert PRONOM formats to FIDO signatures...")
|
162
|
-
prepare_pronom_to_fido()
|
163
|
-
print("FIDO signatures successfully updated")
|
164
|
-
|
165
|
-
except KeyboardInterrupt:
|
166
|
-
sys.exit('Aborting update...')
|
167
|
-
|
168
|
-
|
169
|
-
def main():
|
170
|
-
"""Main CLI entrypoint."""
|
171
|
-
parser = ArgumentParser(description='Download and convert the latest PRONOM signatures')
|
172
|
-
parser.add_argument('-tmpdir', default=options['tmp_dir'], help='Location to store temporary files', dest='tmp_dir')
|
173
|
-
parser.add_argument('-keep_tmp', default=options['deleteTempDirectory'], help='Do not delete temporary files after completion', dest='deleteTempDirectory', action='store_false')
|
174
|
-
parser.add_argument('-http_throttle', default=options['http_throttle'], help='Time (in seconds) to wait between downloads', type=float, dest='http_throttle')
|
175
|
-
args = parser.parse_args()
|
176
|
-
opts = defaults.copy()
|
177
|
-
opts.update(vars(args))
|
178
|
-
|
179
|
-
run(opts)
|
180
|
-
|
181
|
-
|
182
|
-
if __name__ == '__main__':
|
183
|
-
main()
|