libis-format 0.9.32 → 0.9.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/data/types.yml +30 -16
- data/lib/libis/format/config.rb +7 -18
- data/lib/libis/format/converter/image_converter.rb +6 -0
- data/lib/libis/format/droid.rb +82 -25
- data/lib/libis/format/extension_identification.rb +55 -0
- data/lib/libis/format/fido.rb +57 -72
- data/lib/libis/format/file_tool.rb +76 -0
- data/lib/libis/format/identification_tool.rb +174 -0
- data/lib/libis/format/identifier.rb +129 -117
- data/lib/libis/format/type_database.rb +36 -5
- data/lib/libis/format/version.rb +1 -1
- data/lib/libis/format.rb +3 -0
- data/libis-format.gemspec +2 -1
- data/spec/converter_spec.rb +6 -4
- data/spec/identifier_spec.rb +125 -34
- metadata +21 -126
- data/tools/droid/DROID_SignatureFile_V90.xml +0 -40182
- data/tools/droid/container-signature-20170330.xml +0 -3584
- data/tools/droid/droid-command-line-6.3.jar +0 -0
- data/tools/droid/droid.bat +0 -152
- data/tools/droid/droid.sh +0 -152
- data/tools/droid/lib/XmlSchema-1.4.7.jar +0 -0
- data/tools/droid/lib/activation-1.1.jar +0 -0
- data/tools/droid/lib/aopalliance-1.0.jar +0 -0
- data/tools/droid/lib/asm-2.2.3.jar +0 -0
- data/tools/droid/lib/aspectjrt-1.8.7.jar +0 -0
- data/tools/droid/lib/aspectjweaver-1.8.7.jar +0 -0
- data/tools/droid/lib/bcmail-jdk14-138.jar +0 -0
- data/tools/droid/lib/bcprov-jdk14-138.jar +0 -0
- data/tools/droid/lib/beansbinding-1.2.1.jar +0 -0
- data/tools/droid/lib/byteseek-2.0.3.jar +0 -0
- data/tools/droid/lib/cglib-nodep-2.2.2.jar +0 -0
- data/tools/droid/lib/classmate-1.0.0.jar +0 -0
- data/tools/droid/lib/commons-cli-1.2.jar +0 -0
- data/tools/droid/lib/commons-codec-1.10.jar +0 -0
- data/tools/droid/lib/commons-collections-3.2.2.jar +0 -0
- data/tools/droid/lib/commons-compress-1.4.1.jar +0 -0
- data/tools/droid/lib/commons-configuration-1.8.jar +0 -0
- data/tools/droid/lib/commons-dbcp-1.4.jar +0 -0
- data/tools/droid/lib/commons-httpclient-3.1.jar +0 -0
- data/tools/droid/lib/commons-io-2.4.jar +0 -0
- data/tools/droid/lib/commons-lang-2.6.jar +0 -0
- data/tools/droid/lib/commons-logging-1.1.1.jar +0 -0
- data/tools/droid/lib/commons-pool-1.5.4.jar +0 -0
- data/tools/droid/lib/cxf-api-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-common-schemas-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-common-utilities-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-http-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-soap-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-xml-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-core-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-databinding-jaxb-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-frontend-jaxws-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-frontend-simple-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-transports-http-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-ws-addr-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-tools-common-2.2.12.jar +0 -0
- data/tools/droid/lib/de.huxhorn.lilith.3rdparty.flyingsaucer.core-renderer-8RC1.jar +0 -0
- data/tools/droid/lib/derby-10.10.2.0.jar +0 -0
- data/tools/droid/lib/droid-container-6.3.jar +0 -0
- data/tools/droid/lib/droid-core-6.3.jar +0 -0
- data/tools/droid/lib/droid-core-interfaces-6.3.jar +0 -0
- data/tools/droid/lib/droid-export-6.3.jar +0 -0
- data/tools/droid/lib/droid-export-interfaces-6.3.jar +0 -0
- data/tools/droid/lib/droid-help-6.3.jar +0 -0
- data/tools/droid/lib/droid-report-6.3.jar +0 -0
- data/tools/droid/lib/droid-report-interfaces-6.3.jar +0 -0
- data/tools/droid/lib/droid-results-6.3.jar +0 -0
- data/tools/droid/lib/geronimo-activation_1.1_spec-1.0.2.jar +0 -0
- data/tools/droid/lib/geronimo-annotation_1.0_spec-1.1.1.jar +0 -0
- data/tools/droid/lib/geronimo-javamail_1.4_spec-1.6.jar +0 -0
- data/tools/droid/lib/geronimo-jaxws_2.1_spec-1.0.jar +0 -0
- data/tools/droid/lib/geronimo-stax-api_1.0_spec-1.0.1.jar +0 -0
- data/tools/droid/lib/geronimo-ws-metadata_2.0_spec-1.1.2.jar +0 -0
- data/tools/droid/lib/hibernate-validator-5.1.0.Final.jar +0 -0
- data/tools/droid/lib/itext-2.0.8.jar +0 -0
- data/tools/droid/lib/javahelp-2.0.05.jar +0 -0
- data/tools/droid/lib/jaxb-api-2.1.jar +0 -0
- data/tools/droid/lib/jaxb-impl-2.1.13.jar +0 -0
- data/tools/droid/lib/jboss-logging-3.1.3.GA.jar +0 -0
- data/tools/droid/lib/joda-time-1.6.2.jar +0 -0
- data/tools/droid/lib/jra-1.0-alpha-4.jar +0 -0
- data/tools/droid/lib/jta-1.1.jar +0 -0
- data/tools/droid/lib/jwat-arc-1.0.3.jar +0 -0
- data/tools/droid/lib/jwat-archive-common-1.0.3.jar +0 -0
- data/tools/droid/lib/jwat-common-1.0.3.jar +0 -0
- data/tools/droid/lib/jwat-gzip-1.0.3.jar +0 -0
- data/tools/droid/lib/jwat-warc-1.0.2.jar +0 -0
- data/tools/droid/lib/log4j-1.2.13.jar +0 -0
- data/tools/droid/lib/neethi-2.0.4.jar +0 -0
- data/tools/droid/lib/opencsv-2.3.jar +0 -0
- data/tools/droid/lib/org-netbeans-swing-outline-7.2.jar +0 -0
- data/tools/droid/lib/org-openide-util-7.2.jar +0 -0
- data/tools/droid/lib/org-openide-util-lookup-7.2.jar +0 -0
- data/tools/droid/lib/poi-3.13.jar +0 -0
- data/tools/droid/lib/saaj-api-1.3.jar +0 -0
- data/tools/droid/lib/saaj-impl-1.3.2.jar +0 -0
- data/tools/droid/lib/slf4j-api-1.4.2.jar +0 -0
- data/tools/droid/lib/slf4j-log4j12-1.4.2.jar +0 -0
- data/tools/droid/lib/spring-aop-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-beans-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-context-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-core-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-expression-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-jdbc-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-orm-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-tx-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-web-2.5.6.jar +0 -0
- data/tools/droid/lib/stax-api-1.0-2.jar +0 -0
- data/tools/droid/lib/trove4j-3.0.3.jar +0 -0
- data/tools/droid/lib/truezip-6.8.4.jar +0 -0
- data/tools/droid/lib/validation-api-1.1.0.Final.jar +0 -0
- data/tools/droid/lib/wsdl4j-1.6.2.jar +0 -0
- data/tools/droid/lib/wstx-asl-3.2.9.jar +0 -0
- data/tools/droid/lib/xercesImpl-2.9.1.jar +0 -0
- data/tools/droid/lib/xml-apis-1.3.04.jar +0 -0
- data/tools/droid/lib/xml-resolver-1.2.jar +0 -0
- data/tools/droid/lib/xz-1.0.jar +0 -0
- data/tools/fido/__init__.py +0 -50
- data/tools/fido/conf/DROID_SignatureFile-v90.xml +0 -2
- data/tools/fido/conf/container-signature-20170330.xml +0 -3584
- data/tools/fido/conf/dc.xsd +0 -119
- data/tools/fido/conf/dcmitype.xsd +0 -53
- data/tools/fido/conf/dcterms.xsd +0 -383
- data/tools/fido/conf/fido-formats.xsd +0 -173
- data/tools/fido/conf/format_extension_template.xml +0 -105
- data/tools/fido/conf/format_extensions.xml +0 -484
- data/tools/fido/conf/formats-v90.xml +0 -48877
- data/tools/fido/conf/pronom-xml-v90.zip +0 -0
- data/tools/fido/conf/versions.xml +0 -8
- data/tools/fido/fido.bat +0 -4
- data/tools/fido/fido.py +0 -884
- data/tools/fido/fido.sh +0 -5
- data/tools/fido/package.py +0 -96
- data/tools/fido/prepare.py +0 -645
- data/tools/fido/pronomutils.py +0 -200
- data/tools/fido/toxml.py +0 -60
- data/tools/fido/update_signatures.py +0 -183
data/tools/fido/pronomutils.py
DELETED
|
@@ -1,200 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
|
-
"""
|
|
4
|
-
PRONOM UTILS.
|
|
5
|
-
|
|
6
|
-
PYTHON FUNCTION TO QUERY PRONOM VERSION
|
|
7
|
-
AND DOWNLOAD SIGNATUREFILE
|
|
8
|
-
USES PRONOM SOAP SERVICE
|
|
9
|
-
|
|
10
|
-
Open Planets Foundation (http://www.openplanetsfoundation.org)
|
|
11
|
-
See License.txt for license information.
|
|
12
|
-
Download from: http://github.com/openplanets/fido/downloads
|
|
13
|
-
Author: Maurice de Rooij (OPF/NANETH), 2012
|
|
14
|
-
|
|
15
|
-
PRONOM UTILS is a library used by FIDO.
|
|
16
|
-
FIDO uses the UK National Archives (TNA) PRONOM File Format and Container descriptions.
|
|
17
|
-
PRONOM is available from http://www.nationalarchives.gov.uk/pronom/
|
|
18
|
-
"""
|
|
19
|
-
|
|
20
|
-
from __future__ import absolute_import
|
|
21
|
-
|
|
22
|
-
import os
|
|
23
|
-
import re
|
|
24
|
-
import sys
|
|
25
|
-
from xml.etree import ElementTree as ET
|
|
26
|
-
from xml.etree.ElementTree import parse, ParseError
|
|
27
|
-
from xml.parsers.expat import ExpatError, ParserCreate
|
|
28
|
-
|
|
29
|
-
import six
|
|
30
|
-
from six.moves import http_client
|
|
31
|
-
|
|
32
|
-
from . import __version__, CONFIG_DIR
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def check_well_formedness(filename, error=False):
|
|
36
|
-
"""
|
|
37
|
-
Check if a given file contains valid XML.
|
|
38
|
-
|
|
39
|
-
:param filename: file from which the XML is read.
|
|
40
|
-
:param error: whether or not print to `stderr` upon error.
|
|
41
|
-
:returns: whether the file contains valid XML.
|
|
42
|
-
"""
|
|
43
|
-
parser = ParserCreate()
|
|
44
|
-
try:
|
|
45
|
-
parser.ParseFile(open(filename, "r"))
|
|
46
|
-
except ExpatError as e:
|
|
47
|
-
if error is not False:
|
|
48
|
-
sys.stderr.write("check_well_formedness: %s: %s;\n" % (filename, e))
|
|
49
|
-
return False
|
|
50
|
-
return True
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
def get_pronom_signature(type_):
|
|
54
|
-
"""
|
|
55
|
-
Get PRONOM signature.
|
|
56
|
-
|
|
57
|
-
Return latest signature file version number as int when `type_` equals
|
|
58
|
-
"version" or return latest signature XML file as string when `type_` equals
|
|
59
|
-
"file". Upon error, write to `stderr` and returls `False`.
|
|
60
|
-
"""
|
|
61
|
-
try:
|
|
62
|
-
soapVersionContainer = """<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><getSignatureFileVersionV1 xmlns="http://pronom.nationalarchives.gov.uk" /></soap:Body></soap:Envelope>"""
|
|
63
|
-
soapFileContainer = """<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><getSignatureFileV1 xmlns="http://pronom.nationalarchives.gov.uk" /></soap:Body></soap:Envelope>"""
|
|
64
|
-
soapVersionHeader = """\"http://pronom.nationalarchives.gov.uk:getSignatureFileVersionV1In\""""
|
|
65
|
-
soapFileHeader = """\"http://pronom.nationalarchives.gov.uk:getSignatureFileV1In\""""
|
|
66
|
-
if type_ == "version":
|
|
67
|
-
soapAction = soapVersionHeader
|
|
68
|
-
soapStr = soapVersionContainer
|
|
69
|
-
elif type_ == "file":
|
|
70
|
-
soapAction = soapFileHeader
|
|
71
|
-
soapStr = soapFileContainer
|
|
72
|
-
else:
|
|
73
|
-
sys.stderr.write("get_pronom_signature(): unknown type: " + type_)
|
|
74
|
-
return False
|
|
75
|
-
webservice = http_client.HTTP("www.nationalarchives.gov.uk")
|
|
76
|
-
webservice.putrequest("POST", "/pronom/service.asmx")
|
|
77
|
-
webservice.putheader("Host", "www.nationalarchives.gov.uk")
|
|
78
|
-
webservice.putheader("User-Agent", "PRONOM UTILS v{0} (OPF)".format(__version__))
|
|
79
|
-
webservice.putheader("Content-type", "text/xml; charset=\"UTF-8\"")
|
|
80
|
-
webservice.putheader("Content-length", "%d" % len(soapStr))
|
|
81
|
-
webservice.putheader("SOAPAction", soapAction)
|
|
82
|
-
try:
|
|
83
|
-
webservice.endheaders()
|
|
84
|
-
except Exception as e:
|
|
85
|
-
sys.stderr.write("get_pronom_signature(): failed to contact PRONOM;\n%s\n" % (e))
|
|
86
|
-
sys.exit()
|
|
87
|
-
webservice.send(soapStr)
|
|
88
|
-
statuscode, statusmessage, header = webservice.getreply()
|
|
89
|
-
if statuscode == 200:
|
|
90
|
-
xml = webservice.getfile()
|
|
91
|
-
if type_ == "version":
|
|
92
|
-
exp = re.compile("\<Version\>([0-9]{1,4})\<\/Version\>")
|
|
93
|
-
sigxml = exp.search(xml.read())
|
|
94
|
-
if len(sigxml.group(1)) > 0:
|
|
95
|
-
return int(sigxml.group(1))
|
|
96
|
-
else:
|
|
97
|
-
sys.stderr.write("get_pronom_signature(): could not parse VERSION from SOAP response: " + type_)
|
|
98
|
-
return False
|
|
99
|
-
if type_ == "file":
|
|
100
|
-
exp = re.compile("\<SignatureFile\>.*\<\/SignatureFile\>")
|
|
101
|
-
sigxml = exp.search(xml.read())
|
|
102
|
-
sigtxt = sigxml.group(0) if sigxml else ''
|
|
103
|
-
if len(sigtxt) > 0:
|
|
104
|
-
tmpfile = "./tmp_getPronomSignature.xml"
|
|
105
|
-
with open(tmpfile, 'wb') as file_:
|
|
106
|
-
file_.write("""<?xml version="1.0" encoding="UTF-8"?>""" + "\n")
|
|
107
|
-
file_.write(sigtxt)
|
|
108
|
-
if not check_well_formedness(tmpfile):
|
|
109
|
-
os.unlink(tmpfile)
|
|
110
|
-
sys.stderr.write("get_pronom_signature(): signaturefile not well formed")
|
|
111
|
-
return False
|
|
112
|
-
else:
|
|
113
|
-
os.unlink(tmpfile)
|
|
114
|
-
return """<?xml version="1.0" encoding="UTF-8"?>""" + "\n" + sigtxt
|
|
115
|
-
else:
|
|
116
|
-
sys.stderr.write("get_pronom_signature(): could not parse XML from SOAP response: " + type_)
|
|
117
|
-
return False
|
|
118
|
-
else:
|
|
119
|
-
sys.stderr.write("get_pronom_signature(): webservice error: '" + str(statuscode) + " " + statusmessage + "'\n")
|
|
120
|
-
return False
|
|
121
|
-
sys.stderr.write("get_pronom_signature(): unexpected return")
|
|
122
|
-
return False
|
|
123
|
-
except Exception as e:
|
|
124
|
-
sys.stderr.write("get_pronom_signature(): unknown error: " + str(e))
|
|
125
|
-
return False
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
class LocalPronomVersions(object):
|
|
129
|
-
"""
|
|
130
|
-
Parse local PRONOM signature versions XML file.
|
|
131
|
-
|
|
132
|
-
This is how the XML document should look like:
|
|
133
|
-
|
|
134
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
|
135
|
-
<versions>
|
|
136
|
-
<pronomVersion>84</pronomVersion>
|
|
137
|
-
<pronomSignature>formats-v84.xml</pronomSignature>
|
|
138
|
-
<pronomContainerSignature>container-signature-20160121.xml</pronomContainerSignature>
|
|
139
|
-
<fidoExtensionSignature>format_extensions.xml</fidoExtensionSignature>
|
|
140
|
-
<updateScript>1.2.2</updateScript>
|
|
141
|
-
</versions>
|
|
142
|
-
"""
|
|
143
|
-
|
|
144
|
-
PROPS_MAPPING = {
|
|
145
|
-
'pronom_version': 'pronomVersion',
|
|
146
|
-
'pronom_signature': 'pronomSignature',
|
|
147
|
-
'pronom_container_signature': 'pronomContainerSignature',
|
|
148
|
-
'fido_extension_signature': 'fidoExtensionSignature',
|
|
149
|
-
'update_script': 'updateScript',
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
ROOT_ELEMENT = 'versions'
|
|
153
|
-
|
|
154
|
-
def __init__(self, versions_file):
|
|
155
|
-
"""Instantiate class based on the file indicated in `versions_file`."""
|
|
156
|
-
self.versions_file = versions_file
|
|
157
|
-
self.conf_dir = os.path.abspath(os.path.dirname(versions_file))
|
|
158
|
-
try:
|
|
159
|
-
self.tree = parse(versions_file)
|
|
160
|
-
self.root = self.tree.getroot()
|
|
161
|
-
except (ParseError, IOError):
|
|
162
|
-
self.root = ET.Element(self.ROOT_ELEMENT)
|
|
163
|
-
self.tree = ET.ElementTree(self.root)
|
|
164
|
-
|
|
165
|
-
def __getattr__(self, name):
|
|
166
|
-
"""Extract the element's text content."""
|
|
167
|
-
if name in self.PROPS_MAPPING:
|
|
168
|
-
return self.root.find(self.PROPS_MAPPING[name]).text
|
|
169
|
-
|
|
170
|
-
def __setattr__(self, name, value):
|
|
171
|
-
"""Update the element's text content."""
|
|
172
|
-
if name in self.PROPS_MAPPING:
|
|
173
|
-
try:
|
|
174
|
-
self.root.find(self.PROPS_MAPPING[name]).text = value
|
|
175
|
-
except AttributeError:
|
|
176
|
-
elem = ET.SubElement(self.root, self.PROPS_MAPPING[name])
|
|
177
|
-
elem.text = value
|
|
178
|
-
else:
|
|
179
|
-
object.__setattr__(self, name, value)
|
|
180
|
-
|
|
181
|
-
def get_zip_file(self):
|
|
182
|
-
"""Obtain location to the PRONOM XML Zip file based on the current PRONOM version."""
|
|
183
|
-
return os.path.join(self.conf_dir, 'pronom-xml-v{}.zip'.format(self.pronom_version))
|
|
184
|
-
|
|
185
|
-
def get_signature_file(self):
|
|
186
|
-
"""Obtain location to the current PRONOM signature file."""
|
|
187
|
-
return os.path.join(self.conf_dir, self.pronom_signature)
|
|
188
|
-
|
|
189
|
-
def write(self):
|
|
190
|
-
"""Update versions.xml."""
|
|
191
|
-
# Check that all the fields are defined
|
|
192
|
-
for key, value in six.iteritems(self.PROPS_MAPPING):
|
|
193
|
-
if self.root.find(value) is None:
|
|
194
|
-
raise ValueError('Field {} has not been defined!'.format(key))
|
|
195
|
-
self.tree.write(self.versions_file, xml_declaration=True, method='xml', encoding='utf-8')
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
def get_local_pronom_versions(config_dir=CONFIG_DIR):
|
|
199
|
-
"""Return an instance of LocalPronomVersions loaded with `conf/versions.xml`."""
|
|
200
|
-
return LocalPronomVersions(os.path.join(config_dir, 'versions.xml'))
|
data/tools/fido/toxml.py
DELETED
|
@@ -1,60 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
|
-
|
|
4
|
-
"""
|
|
5
|
-
FIDO CSV output to XML.
|
|
6
|
-
|
|
7
|
-
Author: Maurice de Rooij <maurice.de.rooij@nationaalarchief.nl>, September 2011
|
|
8
|
-
|
|
9
|
-
Usage in combination with FIDO:
|
|
10
|
-
- Windows: python fido.py [ARGS] | python toxml.py > output.xml
|
|
11
|
-
- Linux: fido.py [ARGS] | toxml.py > output.xml
|
|
12
|
-
|
|
13
|
-
Usage afterwards:
|
|
14
|
-
- Windows: type output.csv | toxml.py > output.xml
|
|
15
|
-
- Linux: cat output.csv | toxml.py > output.xml
|
|
16
|
-
|
|
17
|
-
For difference in usage, see:
|
|
18
|
-
- http://bugs.python.org/issue9390
|
|
19
|
-
- http://support.microsoft.com/default.aspx?kbid=321788
|
|
20
|
-
"""
|
|
21
|
-
|
|
22
|
-
from __future__ import absolute_import
|
|
23
|
-
|
|
24
|
-
import csv
|
|
25
|
-
import sys
|
|
26
|
-
|
|
27
|
-
from . import __version__
|
|
28
|
-
from .pronomutils import get_local_pronom_versions
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
def main():
|
|
32
|
-
"""Generate XML as read from CSV and send it to the standard output stream."""
|
|
33
|
-
sys.stdout.write("""<?xml version="1.0" encoding="utf-8"?>
|
|
34
|
-
<fido_output>
|
|
35
|
-
<versions>
|
|
36
|
-
<fido_version>{0}</fido_version>
|
|
37
|
-
<signature_version>{1}</signature_version>
|
|
38
|
-
</versions>""".format(__version__, get_local_pronom_versions().pronom_version))
|
|
39
|
-
|
|
40
|
-
reader = csv.reader(sys.stdin)
|
|
41
|
-
|
|
42
|
-
for row in reader:
|
|
43
|
-
sys.stdout.write("""
|
|
44
|
-
<file>
|
|
45
|
-
<filename>{0}</filename>
|
|
46
|
-
<status>{1}</status>
|
|
47
|
-
<matchtype>{2}</matchtype>
|
|
48
|
-
<time>{3}</time>
|
|
49
|
-
<puid>{4}</puid>
|
|
50
|
-
<mimetype>{5}</mimetype>
|
|
51
|
-
<formatname>{6}</formatname>
|
|
52
|
-
<signaturename>{7}</signaturename>
|
|
53
|
-
<filesize>{8}</filesize>
|
|
54
|
-
</file>""".format(row[6], row[0], row[8], row[1], row[2], row[7], row[3], row[4], row[5]))
|
|
55
|
-
|
|
56
|
-
sys.stdout.write("\n</fido_output>\n")
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
if __name__ == '__main__':
|
|
60
|
-
main()
|
|
@@ -1,183 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
|
-
|
|
4
|
-
"""
|
|
5
|
-
FIDO SIGNATURE UPDATER.
|
|
6
|
-
|
|
7
|
-
Open Planets Foundation (http://www.openplanetsfoundation.org)
|
|
8
|
-
See License.txt for license information.
|
|
9
|
-
Download from: https://github.com/openplanets/fido/releases
|
|
10
|
-
Author: Maurice de Rooij (NANETH), 2012
|
|
11
|
-
|
|
12
|
-
FIDO uses the UK National Archives (TNA) PRONOM File Format and Container descriptions.
|
|
13
|
-
PRONOM is available from http://www.nationalarchives.gov.uk/pronom/.
|
|
14
|
-
"""
|
|
15
|
-
|
|
16
|
-
from __future__ import print_function
|
|
17
|
-
|
|
18
|
-
from argparse import ArgumentParser
|
|
19
|
-
import os
|
|
20
|
-
from shutil import rmtree
|
|
21
|
-
import sys
|
|
22
|
-
import time
|
|
23
|
-
from xml.etree import ElementTree as CET
|
|
24
|
-
import zipfile
|
|
25
|
-
|
|
26
|
-
from six.moves.urllib.request import urlopen
|
|
27
|
-
|
|
28
|
-
from . import __version__, CONFIG_DIR, query_yes_no
|
|
29
|
-
from .prepare import run as prepare_pronom_to_fido
|
|
30
|
-
from .pronomutils import check_well_formedness, get_local_pronom_versions, get_pronom_signature
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
defaults = {
|
|
34
|
-
'signatureFileName': 'DROID_SignatureFile-v{0}.xml',
|
|
35
|
-
'pronomZipFileName': 'pronom-xml-v{0}.zip',
|
|
36
|
-
'fidoSignatureVersion': 'format_extensions.xml',
|
|
37
|
-
'containerVersion': 'container-signature-20160121.xml', # container version is frozen and needs human attention before updating,
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
options = {
|
|
41
|
-
'http_throttle': 0.5, # in secs, to prevent DoS of PRONOM server
|
|
42
|
-
'tmp_dir': os.path.join(CONFIG_DIR, 'tmp'),
|
|
43
|
-
'deleteTempDirectory': True,
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
def run(defaults=defaults):
|
|
48
|
-
"""
|
|
49
|
-
Update PRONOM signatures.
|
|
50
|
-
|
|
51
|
-
Interactive script, requires keyboard input.
|
|
52
|
-
"""
|
|
53
|
-
print("FIDO signature updater v{}".format(__version__))
|
|
54
|
-
|
|
55
|
-
try:
|
|
56
|
-
print("Contacting PRONOM...")
|
|
57
|
-
currentVersion = get_pronom_signature("version")
|
|
58
|
-
if not currentVersion:
|
|
59
|
-
sys.exit('Failed to obtain PRONOM signature file version number, please try again.')
|
|
60
|
-
|
|
61
|
-
print("Querying latest signaturefile version...")
|
|
62
|
-
signatureFile = os.path.join(CONFIG_DIR, defaults['signatureFileName'].format(currentVersion))
|
|
63
|
-
if os.path.isfile(signatureFile):
|
|
64
|
-
print("You already have the latest PRONOM signature file, version", currentVersion)
|
|
65
|
-
if not query_yes_no("Update anyway?"):
|
|
66
|
-
sys.exit('Aborting update...')
|
|
67
|
-
|
|
68
|
-
print("Downloading signature file version {}...".format(currentVersion))
|
|
69
|
-
currentFile = get_pronom_signature("file")
|
|
70
|
-
if not currentFile:
|
|
71
|
-
sys.exit('Failed to obtain PRONOM signature file, please try again.')
|
|
72
|
-
print("Writing {0}...".format(defaults['signatureFileName'].format(currentVersion)))
|
|
73
|
-
with open(signatureFile, 'wb') as file_:
|
|
74
|
-
file_.write(currentFile)
|
|
75
|
-
|
|
76
|
-
print("Extracting PRONOM PUID's from signature file...")
|
|
77
|
-
tree = CET.parse(signatureFile)
|
|
78
|
-
puids = []
|
|
79
|
-
for node in tree.iter("{http://www.nationalarchives.gov.uk/pronom/SignatureFile}FileFormat"):
|
|
80
|
-
puids.append(node.get("PUID"))
|
|
81
|
-
numberPuids = len(puids)
|
|
82
|
-
print("Found {} PRONOM PUID's".format(numberPuids))
|
|
83
|
-
|
|
84
|
-
print("Downloading signatures can take a while")
|
|
85
|
-
if not query_yes_no("Continue and download signatures?"):
|
|
86
|
-
sys.exit('Aborting update...')
|
|
87
|
-
tmpdir = defaults['tmp_dir']
|
|
88
|
-
if os.path.isdir(tmpdir):
|
|
89
|
-
print("Found previously created temporary folder for download:", tmpdir)
|
|
90
|
-
resume_download = query_yes_no('Do you want to resume download (yes) or start over (no)?')
|
|
91
|
-
if resume_download:
|
|
92
|
-
print("Resuming download...")
|
|
93
|
-
else:
|
|
94
|
-
print("Creating temporary folder for download:", tmpdir)
|
|
95
|
-
try:
|
|
96
|
-
os.mkdir(tmpdir)
|
|
97
|
-
except:
|
|
98
|
-
pass
|
|
99
|
-
if not os.path.isdir(tmpdir):
|
|
100
|
-
print("Failed to create temporary folder for PUID's, using", tmpdir)
|
|
101
|
-
|
|
102
|
-
print("Downloading signatures, one moment please...")
|
|
103
|
-
one_percent = (float(numberPuids) / 100)
|
|
104
|
-
numfiles = 0
|
|
105
|
-
for puid in puids:
|
|
106
|
-
puidType, puidNum = puid.split("/")
|
|
107
|
-
puidFileName = "puid." + puidType + "." + puidNum + ".xml"
|
|
108
|
-
filename = os.path.join(tmpdir, puidFileName)
|
|
109
|
-
if os.path.isfile(filename) and check_well_formedness(filename) and resume_download:
|
|
110
|
-
numfiles += 1
|
|
111
|
-
continue
|
|
112
|
-
puid_url = "http://www.nationalarchives.gov.uk/pronom/{}.xml".format(puid)
|
|
113
|
-
try:
|
|
114
|
-
filehandle = urlopen(puid_url)
|
|
115
|
-
except Exception as e:
|
|
116
|
-
print("Failed to download signature file:", puid_url)
|
|
117
|
-
print("Error:", str(e))
|
|
118
|
-
sys.exit('Please restart and resume download.')
|
|
119
|
-
with open(filename, 'wb') as file_:
|
|
120
|
-
for lines in filehandle.readlines():
|
|
121
|
-
file_.write(lines)
|
|
122
|
-
filehandle.close()
|
|
123
|
-
if not check_well_formedness(filename):
|
|
124
|
-
os.unlink(filename)
|
|
125
|
-
continue
|
|
126
|
-
numfiles += 1
|
|
127
|
-
percent = int(float(numfiles) / one_percent)
|
|
128
|
-
print(r"{}/{} files [{}%]".format(numfiles, numberPuids, percent))
|
|
129
|
-
time.sleep(defaults['http_throttle'])
|
|
130
|
-
print("100%")
|
|
131
|
-
|
|
132
|
-
print("Creating PRONOM zip...")
|
|
133
|
-
compression = zipfile.ZIP_DEFLATED if 'zlib' in sys.modules else zipfile.ZIP_STORED
|
|
134
|
-
modes = {zipfile.ZIP_DEFLATED: 'deflated', zipfile.ZIP_STORED: 'stored'}
|
|
135
|
-
zf = zipfile.ZipFile(os.path.join(CONFIG_DIR, defaults['pronomZipFileName'].format(currentVersion)), mode='w')
|
|
136
|
-
print("Adding files with compression mode", modes[compression])
|
|
137
|
-
for puid in puids:
|
|
138
|
-
puidType, puidNum = puid.split("/")
|
|
139
|
-
puidFileName = "puid.{}.{}.xml".format(puidType, puidNum)
|
|
140
|
-
filename = os.path.join(tmpdir, puidFileName)
|
|
141
|
-
if os.path.isfile(filename):
|
|
142
|
-
zf.write(filename, arcname=puidFileName, compress_type=compression)
|
|
143
|
-
if defaults['deleteTempDirectory']:
|
|
144
|
-
os.unlink(filename)
|
|
145
|
-
zf.close()
|
|
146
|
-
|
|
147
|
-
if defaults['deleteTempDirectory']:
|
|
148
|
-
print("Deleting temporary folder and files...")
|
|
149
|
-
rmtree(tmpdir, ignore_errors=True)
|
|
150
|
-
|
|
151
|
-
print('Updating versions.xml...')
|
|
152
|
-
versions = get_local_pronom_versions()
|
|
153
|
-
versions.pronom_version = str(currentVersion)
|
|
154
|
-
versions.pronom_signature = "formats-v" + str(currentVersion) + ".xml"
|
|
155
|
-
versions.pronom_container_signature = defaults['containerVersion']
|
|
156
|
-
versions.fido_extension_signature = defaults['fidoSignatureVersion']
|
|
157
|
-
versions.update_script = __version__
|
|
158
|
-
versions.write()
|
|
159
|
-
|
|
160
|
-
# TODO: there should be a check here to handle prepare.main exit() signal (-1/0/1/...)
|
|
161
|
-
print("Preparing to convert PRONOM formats to FIDO signatures...")
|
|
162
|
-
prepare_pronom_to_fido()
|
|
163
|
-
print("FIDO signatures successfully updated")
|
|
164
|
-
|
|
165
|
-
except KeyboardInterrupt:
|
|
166
|
-
sys.exit('Aborting update...')
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
def main():
|
|
170
|
-
"""Main CLI entrypoint."""
|
|
171
|
-
parser = ArgumentParser(description='Download and convert the latest PRONOM signatures')
|
|
172
|
-
parser.add_argument('-tmpdir', default=options['tmp_dir'], help='Location to store temporary files', dest='tmp_dir')
|
|
173
|
-
parser.add_argument('-keep_tmp', default=options['deleteTempDirectory'], help='Do not delete temporary files after completion', dest='deleteTempDirectory', action='store_false')
|
|
174
|
-
parser.add_argument('-http_throttle', default=options['http_throttle'], help='Time (in seconds) to wait between downloads', type=float, dest='http_throttle')
|
|
175
|
-
args = parser.parse_args()
|
|
176
|
-
opts = defaults.copy()
|
|
177
|
-
opts.update(vars(args))
|
|
178
|
-
|
|
179
|
-
run(opts)
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
if __name__ == '__main__':
|
|
183
|
-
main()
|