libis-format 0.9.5-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.coveralls.yml +2 -0
- data/.gitignore +18 -0
- data/.travis.yml +41 -0
- data/Gemfile +5 -0
- data/LICENSE.txt +22 -0
- data/README.md +39 -0
- data/Rakefile +8 -0
- data/bin/droid +15 -0
- data/bin/fido +12 -0
- data/bin/pdf_copy +13 -0
- data/data/ISOcoated_v2_eci.icc +0 -0
- data/data/PDFA_def.ps +40 -0
- data/data/ead.xsd +2728 -0
- data/data/eciRGB_v2.icc +0 -0
- data/data/lias_formats.xml +106 -0
- data/data/types.yml +217 -0
- data/lib/libis/format/config.rb +35 -0
- data/lib/libis/format/converter/base.rb +101 -0
- data/lib/libis/format/converter/chain.rb +167 -0
- data/lib/libis/format/converter/image_converter.rb +214 -0
- data/lib/libis/format/converter/office_converter.rb +50 -0
- data/lib/libis/format/converter/pdf_converter.rb +139 -0
- data/lib/libis/format/converter/repository.rb +98 -0
- data/lib/libis/format/converter.rb +11 -0
- data/lib/libis/format/droid.rb +45 -0
- data/lib/libis/format/fido.rb +102 -0
- data/lib/libis/format/identifier.rb +189 -0
- data/lib/libis/format/office_to_pdf.rb +52 -0
- data/lib/libis/format/pdf_copy.rb +40 -0
- data/lib/libis/format/pdf_merge.rb +41 -0
- data/lib/libis/format/pdf_split.rb +39 -0
- data/lib/libis/format/pdf_to_pdfa.rb +76 -0
- data/lib/libis/format/pdfa_validator.rb +61 -0
- data/lib/libis/format/type_database.rb +170 -0
- data/lib/libis/format/version.rb +5 -0
- data/lib/libis/format.rb +23 -0
- data/lib/libis-format.rb +1 -0
- data/libis-format.gemspec +34 -0
- data/spec/converter_spec.rb +212 -0
- data/spec/data/Cevennes2.bmp +0 -0
- data/spec/data/Cevennes2.jp2 +0 -0
- data/spec/data/Cevennes2.ppm +22492 -0
- data/spec/data/test-ead.xml +392 -0
- data/spec/data/test-jpg.tif +0 -0
- data/spec/data/test-lzw.tif +0 -0
- data/spec/data/test-options.jpg +0 -0
- data/spec/data/test.bmp +0 -0
- data/spec/data/test.doc +0 -0
- data/spec/data/test.docx +0 -0
- data/spec/data/test.gif +0 -0
- data/spec/data/test.jpg +0 -0
- data/spec/data/test.ods +0 -0
- data/spec/data/test.odt +0 -0
- data/spec/data/test.pdf +0 -0
- data/spec/data/test.pdf.tif +0 -0
- data/spec/data/test.png +0 -0
- data/spec/data/test.ps +8631 -0
- data/spec/data/test.psd +0 -0
- data/spec/data/test.rtf +1455 -0
- data/spec/data/test.tif +0 -0
- data/spec/data/test.txt +12 -0
- data/spec/data/test.xcf +0 -0
- data/spec/data/test.xls +0 -0
- data/spec/data/test.xlsx +0 -0
- data/spec/data/test.xml +4 -0
- data/spec/data/test_pdfa.pdf +0 -0
- data/spec/identifier_spec.rb +60 -0
- data/spec/spec_helper.rb +9 -0
- data/spec/test_types.yml +12 -0
- data/spec/type_database_spec.rb +140 -0
- data/tools/PdfTool.jar +0 -0
- data/tools/bcpkix-jdk15on-1.49.jar +0 -0
- data/tools/bcprov-jdk15on-1.49.jar +0 -0
- data/tools/droid/DROID_SignatureFile_V82.xml +32681 -0
- data/tools/droid/container-signature-20150307.xml +2235 -0
- data/tools/droid/droid-command-line-6.1.5.jar +0 -0
- data/tools/droid/droid.bat +154 -0
- data/tools/droid/droid.sh +138 -0
- data/tools/droid/lib/XmlSchema-1.4.7.jar +0 -0
- data/tools/droid/lib/activation-1.1.jar +0 -0
- data/tools/droid/lib/antlr-2.7.7.jar +0 -0
- data/tools/droid/lib/antlr-3.2.jar +0 -0
- data/tools/droid/lib/antlr-runtime-3.2.jar +0 -0
- data/tools/droid/lib/aopalliance-1.0.jar +0 -0
- data/tools/droid/lib/asm-2.2.3.jar +0 -0
- data/tools/droid/lib/aspectjrt-1.7.2.jar +0 -0
- data/tools/droid/lib/aspectjweaver-1.7.2.jar +0 -0
- data/tools/droid/lib/bcmail-jdk14-138.jar +0 -0
- data/tools/droid/lib/bcprov-jdk14-138.jar +0 -0
- data/tools/droid/lib/beansbinding-1.2.1.jar +0 -0
- data/tools/droid/lib/byteseek-1.1.1.jar +0 -0
- data/tools/droid/lib/cglib-nodep-2.2.2.jar +0 -0
- data/tools/droid/lib/classmate-1.0.0.jar +0 -0
- data/tools/droid/lib/commons-cli-1.2.jar +0 -0
- data/tools/droid/lib/commons-codec-1.4.jar +0 -0
- data/tools/droid/lib/commons-collections-3.2.1.jar +0 -0
- data/tools/droid/lib/commons-compress-1.4.1.jar +0 -0
- data/tools/droid/lib/commons-configuration-1.8.jar +0 -0
- data/tools/droid/lib/commons-dbcp-1.4.jar +0 -0
- data/tools/droid/lib/commons-httpclient-3.1.jar +0 -0
- data/tools/droid/lib/commons-io-2.4.jar +0 -0
- data/tools/droid/lib/commons-lang-2.6.jar +0 -0
- data/tools/droid/lib/commons-logging-1.1.1.jar +0 -0
- data/tools/droid/lib/commons-pool-1.5.4.jar +0 -0
- data/tools/droid/lib/cxf-api-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-common-schemas-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-common-utilities-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-http-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-soap-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-xml-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-core-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-databinding-jaxb-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-frontend-jaxws-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-frontend-simple-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-transports-http-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-ws-addr-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-tools-common-2.2.12.jar +0 -0
- data/tools/droid/lib/de.huxhorn.lilith.3rdparty.flyingsaucer.core-renderer-8RC1.jar +0 -0
- data/tools/droid/lib/derby-10.10.2.0.jar +0 -0
- data/tools/droid/lib/dom4j-1.6.1.jar +0 -0
- data/tools/droid/lib/droid-container-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-core-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-core-interfaces-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-export-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-export-interfaces-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-help-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-report-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-report-interfaces-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-results-6.1.5.jar +0 -0
- data/tools/droid/lib/ejb3-persistence-1.0.2.GA.jar +0 -0
- data/tools/droid/lib/geronimo-activation_1.1_spec-1.0.2.jar +0 -0
- data/tools/droid/lib/geronimo-annotation_1.0_spec-1.1.1.jar +0 -0
- data/tools/droid/lib/geronimo-javamail_1.4_spec-1.6.jar +0 -0
- data/tools/droid/lib/geronimo-jaxws_2.1_spec-1.0.jar +0 -0
- data/tools/droid/lib/geronimo-stax-api_1.0_spec-1.0.1.jar +0 -0
- data/tools/droid/lib/geronimo-ws-metadata_2.0_spec-1.1.2.jar +0 -0
- data/tools/droid/lib/hibernate-commons-annotations-4.0.4.Final.jar +0 -0
- data/tools/droid/lib/hibernate-core-4.3.5.Final.jar +0 -0
- data/tools/droid/lib/hibernate-entitymanager-4.3.5.Final.jar +0 -0
- data/tools/droid/lib/hibernate-jpa-2.1-api-1.0.0.Final.jar +0 -0
- data/tools/droid/lib/hibernate-validator-5.1.0.Final.jar +0 -0
- data/tools/droid/lib/itext-2.0.8.jar +0 -0
- data/tools/droid/lib/jandex-1.1.0.Final.jar +0 -0
- data/tools/droid/lib/javahelp-2.0.05.jar +0 -0
- data/tools/droid/lib/javassist-3.18.1-GA.jar +0 -0
- data/tools/droid/lib/jaxb-api-2.1.jar +0 -0
- data/tools/droid/lib/jaxb-impl-2.1.13.jar +0 -0
- data/tools/droid/lib/jboss-logging-3.1.3.GA.jar +0 -0
- data/tools/droid/lib/jboss-logging-annotations-1.2.0.Beta1.jar +0 -0
- data/tools/droid/lib/jboss-transaction-api_1.2_spec-1.0.0.Final.jar +0 -0
- data/tools/droid/lib/joda-time-1.6.2.jar +0 -0
- data/tools/droid/lib/jra-1.0-alpha-4.jar +0 -0
- data/tools/droid/lib/jta-1.1.jar +0 -0
- data/tools/droid/lib/log4j-1.2.13.jar +0 -0
- data/tools/droid/lib/neethi-2.0.4.jar +0 -0
- data/tools/droid/lib/opencsv-2.3.jar +0 -0
- data/tools/droid/lib/org-netbeans-swing-outline-7.2.jar +0 -0
- data/tools/droid/lib/org-openide-util-7.2.jar +0 -0
- data/tools/droid/lib/org-openide-util-lookup-7.2.jar +0 -0
- data/tools/droid/lib/poi-3.7.jar +0 -0
- data/tools/droid/lib/saaj-api-1.3.jar +0 -0
- data/tools/droid/lib/saaj-impl-1.3.2.jar +0 -0
- data/tools/droid/lib/slf4j-api-1.4.2.jar +0 -0
- data/tools/droid/lib/slf4j-log4j12-1.4.2.jar +0 -0
- data/tools/droid/lib/spring-aop-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-beans-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-context-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-core-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-expression-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-jdbc-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-orm-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-tx-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-web-2.5.6.jar +0 -0
- data/tools/droid/lib/stax-api-1.0-2.jar +0 -0
- data/tools/droid/lib/stringtemplate-3.2.jar +0 -0
- data/tools/droid/lib/truezip-6.8.4.jar +0 -0
- data/tools/droid/lib/validation-api-1.1.0.Final.jar +0 -0
- data/tools/droid/lib/wsdl4j-1.6.2.jar +0 -0
- data/tools/droid/lib/wstx-asl-3.2.9.jar +0 -0
- data/tools/droid/lib/xercesImpl-2.9.1.jar +0 -0
- data/tools/droid/lib/xml-apis-1.3.04.jar +0 -0
- data/tools/droid/lib/xml-resolver-1.2.jar +0 -0
- data/tools/droid/lib/xz-1.0.jar +0 -0
- data/tools/fido/__init__.py +0 -0
- data/tools/fido/argparselocal.py +2355 -0
- data/tools/fido/conf/DROID_SignatureFile-v81.xml +2 -0
- data/tools/fido/conf/container-signature-20150307.xml +2238 -0
- data/tools/fido/conf/dc.xsd +119 -0
- data/tools/fido/conf/dcmitype.xsd +53 -0
- data/tools/fido/conf/dcterms.xsd +383 -0
- data/tools/fido/conf/fido-formats.xsd +173 -0
- data/tools/fido/conf/format_extension_template.xml +105 -0
- data/tools/fido/conf/format_extensions.xml +498 -0
- data/tools/fido/conf/formats-v81.xml +38355 -0
- data/tools/fido/conf/pronom-xml-v81.zip +0 -0
- data/tools/fido/conf/versions.xml +8 -0
- data/tools/fido/fido.bat +4 -0
- data/tools/fido/fido.py +854 -0
- data/tools/fido/fido.sh +5 -0
- data/tools/fido/prepare.py +616 -0
- data/tools/fido/pronomutils.py +115 -0
- data/tools/fido/toxml.py +52 -0
- data/tools/fido/update_signatures.py +171 -0
- data/tools/pdfbox/pdfbox-app-1.8.10.jar +0 -0
- data/tools/pdfbox/preflight-app-1.8.10.jar +0 -0
- metadata +396 -0
@@ -0,0 +1,115 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# PRONOM UTILS
|
4
|
+
#
|
5
|
+
# PYTHON FUNCTION TO QUERY PRONOM VERSION
|
6
|
+
# AND DOWNLOAD SIGNATUREFILE
|
7
|
+
# USES PRONOM SOAP SERVICE
|
8
|
+
#
|
9
|
+
# Open Planets Foundation (http://www.openplanetsfoundation.org)
|
10
|
+
# See License.txt for license information.
|
11
|
+
# Download from: http://github.com/openplanets/fido/downloads
|
12
|
+
# Author: Maurice de Rooij (OPF/NANETH), 2012
|
13
|
+
#
|
14
|
+
# PRONOM UTILS is a library used by FIDO
|
15
|
+
# FIDO uses the UK National Archives (TNA) PRONOM File Format and Container descriptions.
|
16
|
+
# PRONOM is available from http://www.nationalarchives.gov.uk/pronom/
|
17
|
+
#
|
18
|
+
import sys
|
19
|
+
from xml.dom import minidom
|
20
|
+
__pronomutils__ = {'version' : '1.0.1'}
|
21
|
+
|
22
|
+
def checkWellFormedness(filename,error=False):
|
23
|
+
"""
|
24
|
+
usage: checkWellFormedness(filename)
|
25
|
+
arguments:
|
26
|
+
"filename": returns true if filename is a valid XML file
|
27
|
+
"error": whether or not print to stderr upon error
|
28
|
+
"""
|
29
|
+
import xml.parsers.expat
|
30
|
+
parser = xml.parsers.expat.ParserCreate()
|
31
|
+
try:
|
32
|
+
parser.ParseFile(open(filename, "r"))
|
33
|
+
except Exception, e:
|
34
|
+
if error is not False:
|
35
|
+
sys.stderr.write("checkWellFormedness: %s: %s;\n" % (filename, e))
|
36
|
+
return False
|
37
|
+
return True
|
38
|
+
|
39
|
+
def getPronomSignature(type):
|
40
|
+
"""
|
41
|
+
usage: getPronomSignature(version|file)
|
42
|
+
arguments:
|
43
|
+
"version": returns latest signature file version number as int
|
44
|
+
"file": returns latest signature XML file as string
|
45
|
+
upon error: writes to stderr and returns false
|
46
|
+
"""
|
47
|
+
try:
|
48
|
+
import httplib
|
49
|
+
import re
|
50
|
+
import os
|
51
|
+
soapVersionContainer = """<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><getSignatureFileVersionV1 xmlns="http://pronom.nationalarchives.gov.uk" /></soap:Body></soap:Envelope>"""
|
52
|
+
soapFileContainer = """<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><getSignatureFileV1 xmlns="http://pronom.nationalarchives.gov.uk" /></soap:Body></soap:Envelope>"""
|
53
|
+
soapVersionHeader = """\"http://pronom.nationalarchives.gov.uk:getSignatureFileVersionV1In\""""
|
54
|
+
soapFileHeader = """\"http://pronom.nationalarchives.gov.uk:getSignatureFileV1In\""""
|
55
|
+
if type == "version":
|
56
|
+
soapAction = soapVersionHeader
|
57
|
+
soapStr = soapVersionContainer
|
58
|
+
elif type == "file":
|
59
|
+
soapAction = soapFileHeader
|
60
|
+
soapStr = soapFileContainer
|
61
|
+
else:
|
62
|
+
sys.stderr.write("getPronomSignature(): unknown type: "+type)
|
63
|
+
return False
|
64
|
+
webservice = httplib.HTTP("apps.nationalarchives.gov.uk")
|
65
|
+
webservice.putrequest("POST", "/pronom/service.asmx")
|
66
|
+
webservice.putheader("Host", "www.nationalarchives.gov.uk")
|
67
|
+
webservice.putheader("User-Agent", "PRONOM UTILS v{0} (OPF)".format(__pronomutils__['version']))
|
68
|
+
webservice.putheader("Content-type", "text/xml; charset=\"UTF-8\"")
|
69
|
+
webservice.putheader("Content-length", "%d" % len(soapStr))
|
70
|
+
webservice.putheader("SOAPAction", soapAction)
|
71
|
+
try:
|
72
|
+
webservice.endheaders()
|
73
|
+
except Exception, e:
|
74
|
+
sys.stderr.write("getPronomSignature(): failed to contact PRONOM;\n%s\n" % (e))
|
75
|
+
sys.exit()
|
76
|
+
webservice.send(soapStr)
|
77
|
+
statuscode, statusmessage, header = webservice.getreply()
|
78
|
+
if statuscode == 200:
|
79
|
+
xml = webservice.getfile()
|
80
|
+
if type == "version":
|
81
|
+
exp = re.compile("\<Version\>([0-9]{1,4})\<\/Version\>")
|
82
|
+
sigxml = exp.search(xml.read())
|
83
|
+
if len(sigxml.group(1)) > 0:
|
84
|
+
return int(sigxml.group(1))
|
85
|
+
else:
|
86
|
+
sys.stderr.write("getPronomSignature(): could not parse VERSION from SOAP response: "+type)
|
87
|
+
return False
|
88
|
+
if type == "file":
|
89
|
+
exp = re.compile("\<SignatureFile\>.*\<\/SignatureFile\>")
|
90
|
+
sigxml = exp.search(xml.read())
|
91
|
+
sigtxt = sigxml.group(0) if sigxml else ''
|
92
|
+
if len(sigtxt) > 0:
|
93
|
+
tmpfile = "./tmp_getPronomSignature.xml"
|
94
|
+
tmp = open(tmpfile,'wb')
|
95
|
+
tmp.write("""<?xml version="1.0" encoding="UTF-8"?>"""+"\n")
|
96
|
+
tmp.write(sigtxt)
|
97
|
+
tmp.close()
|
98
|
+
if not checkWellFormedness(tmpfile):
|
99
|
+
os.unlink(tmpfile)
|
100
|
+
sys.stderr.write("getPronomSignature(): signaturefile not well formed")
|
101
|
+
return False
|
102
|
+
else:
|
103
|
+
os.unlink(tmpfile)
|
104
|
+
return """<?xml version="1.0" encoding="UTF-8"?>"""+"\n"+sigtxt
|
105
|
+
else:
|
106
|
+
sys.stderr.write("getPronomSignature(): could not parse XML from SOAP response: "+type)
|
107
|
+
return False
|
108
|
+
else:
|
109
|
+
sys.stderr.write("getPronomSignature(): webservice error: '"+str(statuscode)+" "+statusmessage+"'\n")
|
110
|
+
return False
|
111
|
+
print sys.stderr.write("getPronomSignature(): unexpected return")
|
112
|
+
return False
|
113
|
+
except Exception, e:
|
114
|
+
print sys.stderr.write("getPronomSignature(): unknown error: "+str(e))
|
115
|
+
return False
|
data/tools/fido/toxml.py
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
#!python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
#
|
4
|
+
# FIDO csv output to XML
|
5
|
+
# Author: Maurice de Rooij <maurice.de.rooij@nationaalarchief.nl>, september 2011
|
6
|
+
#
|
7
|
+
# Usage in combination with FIDO:
|
8
|
+
# Windows: python fido.py [ARGS] | python toxml.py > output.xml
|
9
|
+
# Linux: fido.py [ARGS] | toxml.py > output.xml
|
10
|
+
#
|
11
|
+
# Usage afterwards:
|
12
|
+
# Windows: type output.csv | toxml.py > output.xml
|
13
|
+
# Linux: cat output.csv | toxml.py > output.xml
|
14
|
+
#
|
15
|
+
# for difference in usage, see:
|
16
|
+
# http://bugs.python.org/issue9390
|
17
|
+
# http://support.microsoft.com/default.aspx?kbid=321788
|
18
|
+
#
|
19
|
+
|
20
|
+
import sys
|
21
|
+
import csv
|
22
|
+
import string
|
23
|
+
|
24
|
+
# define FIDO version
|
25
|
+
fidoVersion = '1.0'
|
26
|
+
# define PRONOM signature version
|
27
|
+
signatureVersion = '56'
|
28
|
+
|
29
|
+
sys.stdout.write("""<?xml version="1.0" encoding="utf-8"?>
|
30
|
+
<fido_output>
|
31
|
+
<versions>
|
32
|
+
<fido_version>{0}</fido_version>
|
33
|
+
<signature_version>{1}</signature_version>
|
34
|
+
</versions>""".format(fidoVersion,signatureVersion))
|
35
|
+
|
36
|
+
reader = csv.reader(sys.stdin)
|
37
|
+
|
38
|
+
for row in reader:
|
39
|
+
sys.stdout.write("""
|
40
|
+
<file>
|
41
|
+
<filename>{0}</filename>
|
42
|
+
<status>{1}</status>
|
43
|
+
<matchtype>{2}</matchtype>
|
44
|
+
<time>{3}</time>
|
45
|
+
<puid>{4}</puid>
|
46
|
+
<mimetype>{5}</mimetype>
|
47
|
+
<formatname>{6}</formatname>
|
48
|
+
<signaturename>{7}</signaturename>
|
49
|
+
<filesize>{8}</filesize>
|
50
|
+
</file>""".format(row[6],row[0],row[8],row[1],row[2],row[7],row[3],row[4],row[5]))
|
51
|
+
|
52
|
+
sys.stdout.write("\n</fido_output>\n")
|
@@ -0,0 +1,171 @@
|
|
1
|
+
#!python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
#
|
4
|
+
# FIDO SIGNATURE UPDATER
|
5
|
+
#
|
6
|
+
# Open Planets Foundation (http://www.openplanetsfoundation.org)
|
7
|
+
# See License.txt for license information.
|
8
|
+
# Download from: https://github.com/openplanets/fido/releases
|
9
|
+
# Author: Maurice de Rooij (NANETH), 2012
|
10
|
+
#
|
11
|
+
# FIDO uses the UK National Archives (TNA) PRONOM File Format and Container descriptions .
|
12
|
+
# PRONOM is available from http://www.nationalarchives.gov.uk/pronom/
|
13
|
+
#
|
14
|
+
import sys, os, urllib, time, zipfile, shutil
|
15
|
+
|
16
|
+
from xml.etree import ElementTree as CET
|
17
|
+
from xml.etree import ElementTree as VET
|
18
|
+
from pronomutils import getPronomSignature, checkWellFormedness
|
19
|
+
import prepare
|
20
|
+
|
21
|
+
defaults = {
|
22
|
+
'version': '1.2.2',
|
23
|
+
'conf_dir': os.path.join(os.path.dirname(__file__), 'conf'),
|
24
|
+
'tmp_dir': 'tmp',
|
25
|
+
'signatureFileName' : 'DROID_SignatureFile-v{0}.xml',
|
26
|
+
'pronomZipFileName' : 'pronom-xml-v{0}.zip',
|
27
|
+
'fidoSignatureVersion' : 'format_extensions.xml',
|
28
|
+
'versionsFileName' : 'versions.xml',
|
29
|
+
'http_throttle' : 0.5, # in secs, to prevent DoS of PRONOM server
|
30
|
+
'containerVersion' : 'container-signature-20130501.xml', # container version is frozen and needs human attention before updating
|
31
|
+
'versionXML' : """<?xml version="1.0" encoding="UTF-8"?>\n<versions>\n\t<pronomVersion>{0}</pronomVersion>\n\t<pronomSignature>{1}</pronomSignature>\n\t<pronomContainerSignature>{2}</pronomContainerSignature>\n\t<fidoExtensionSignature>{3}</fidoExtensionSignature>\n\t<updateScript>{4}</updateScript>\n</versions>"""
|
32
|
+
}
|
33
|
+
|
34
|
+
def main(defaults):
|
35
|
+
"""
|
36
|
+
Updates PRONOM signatures
|
37
|
+
Interactive script, requires keyboard input
|
38
|
+
"""
|
39
|
+
try:
|
40
|
+
resume_download = False
|
41
|
+
answers = ['y','yes']
|
42
|
+
versionXML = defaults['versionXML'].format("{0}","{1}",defaults['containerVersion'],defaults['fidoSignatureVersion'],defaults['version'])
|
43
|
+
#print versionXML
|
44
|
+
print "FIDO signature updater v"+defaults['version']
|
45
|
+
print "Contacting PRONOM..."
|
46
|
+
currentVersion = getPronomSignature("version")
|
47
|
+
if currentVersion == False:
|
48
|
+
print "Failed to obtain PRONOM signature file version number, please try again"
|
49
|
+
sys.exit()
|
50
|
+
print "Querying latest signaturefile version..."
|
51
|
+
signatureFile = os.path.join(os.path.abspath(defaults['conf_dir']), defaults['signatureFileName'].format(currentVersion))
|
52
|
+
if os.path.isfile(signatureFile):
|
53
|
+
print "You already have the latest PRONOM signature file, version "+str(currentVersion)
|
54
|
+
ask = raw_input("Update anyway? (yes/no): ")
|
55
|
+
if ask.lower() not in answers:
|
56
|
+
sys.exit()
|
57
|
+
print "Downloading signature file version "+str(currentVersion)+"..."
|
58
|
+
currentFile = getPronomSignature("file")
|
59
|
+
if currentFile == False:
|
60
|
+
print "Failed to obtain PRONOM signature file, please try again"
|
61
|
+
exit()
|
62
|
+
sigfile = open(signatureFile,'wb')
|
63
|
+
sigfile.write(currentFile)
|
64
|
+
sigfile.close()
|
65
|
+
print "Writing {0}...".format(defaults['signatureFileName'].format(currentVersion))
|
66
|
+
print "Extracting PRONOM PUID's from signature file..."
|
67
|
+
tree = CET.parse(signatureFile)
|
68
|
+
puids = []
|
69
|
+
for node in tree.iter("{http://www.nationalarchives.gov.uk/pronom/SignatureFile}FileFormat"):
|
70
|
+
puids.append(node.get("PUID"))
|
71
|
+
numberPuids = len(puids)
|
72
|
+
print "Found "+str(numberPuids)+" PRONOM PUID's"
|
73
|
+
print "Downloading signatures can take a while"
|
74
|
+
ask = raw_input("Continue and download signatures? (yes/no): ")
|
75
|
+
if ask.lower() not in answers:
|
76
|
+
print "Aborting update..."
|
77
|
+
sys.exit()
|
78
|
+
tmpdir = os.path.join(os.path.abspath(defaults['conf_dir']), defaults['tmp_dir'])
|
79
|
+
if os.path.isdir(tmpdir):
|
80
|
+
print "Found previously created temporary folder for download:", tmpdir
|
81
|
+
ask = raw_input("Resume download (yes) or start over (no)?: ")
|
82
|
+
if ask.lower() in answers:
|
83
|
+
print "Resuming download..."
|
84
|
+
resume_download = True
|
85
|
+
else:
|
86
|
+
resume_download = False
|
87
|
+
else:
|
88
|
+
print "Creating temporary folder for download:", tmpdir
|
89
|
+
try:
|
90
|
+
os.mkdir(tmpdir)
|
91
|
+
except:
|
92
|
+
pass
|
93
|
+
if not os.path.isdir(tmpdir):
|
94
|
+
tmpdir = os.path.join(os.path.abspath(defaults['conf_dir']))
|
95
|
+
print "Failed to create temporary folder for PUID's, using", tmpdir
|
96
|
+
print "Downloading signatures, one moment please..."
|
97
|
+
one_percent = (float(numberPuids) / 100)
|
98
|
+
numfiles = 0
|
99
|
+
for puid in puids:
|
100
|
+
puidType, puidNum = puid.split("/")
|
101
|
+
puidFileName = "puid."+puidType+"."+puidNum+".xml"
|
102
|
+
filename = os.path.join(tmpdir, puidFileName)
|
103
|
+
if os.path.isfile(filename) and checkWellFormedness(filename) and resume_download is not False:
|
104
|
+
numfiles += 1
|
105
|
+
continue
|
106
|
+
puidUrl = "http://www.nationalarchives.gov.uk/pronom/"+puid+".xml"
|
107
|
+
try:
|
108
|
+
filehandle = urllib.urlopen(puidUrl)
|
109
|
+
except Exception, e:
|
110
|
+
print "Failed to download signaturefile:", puidUrl
|
111
|
+
print "Error:", str(e)
|
112
|
+
print "Please restart and resume download"
|
113
|
+
sys.exit()
|
114
|
+
puidfile = open(filename,'wb')
|
115
|
+
for lines in filehandle.readlines():
|
116
|
+
puidfile.write(lines)
|
117
|
+
puidfile.close()
|
118
|
+
filehandle.close()
|
119
|
+
if not checkWellFormedness(filename):
|
120
|
+
os.unlink(filename)
|
121
|
+
continue
|
122
|
+
numfiles += 1
|
123
|
+
percent = int(float(numfiles) / one_percent)
|
124
|
+
print "\r",
|
125
|
+
print str(percent)+"%",
|
126
|
+
time.sleep(defaults['http_throttle'])
|
127
|
+
print "100%"
|
128
|
+
try:
|
129
|
+
import zlib
|
130
|
+
compression = zipfile.ZIP_DEFLATED
|
131
|
+
except:
|
132
|
+
compression = zipfile.ZIP_STORED
|
133
|
+
modes = {zipfile.ZIP_DEFLATED: 'deflated', zipfile.ZIP_STORED: 'stored'}
|
134
|
+
print "Creating PRONOM zip,",
|
135
|
+
zf = zipfile.ZipFile(os.path.join(os.path.abspath(defaults['conf_dir']), defaults['pronomZipFileName'].format(currentVersion)), mode='w')
|
136
|
+
print "adding files with compression mode '"+modes[compression]+"'"
|
137
|
+
for puid in puids:
|
138
|
+
puidType, puidNum = puid.split("/")
|
139
|
+
puidFileName = "puid."+puidType+"."+puidNum+".xml"
|
140
|
+
filename = os.path.join(os.path.abspath(defaults['conf_dir']), defaults['tmp_dir'], puidFileName)
|
141
|
+
if os.path.isfile(filename):
|
142
|
+
zf.write(filename, arcname=puidFileName, compress_type=compression)
|
143
|
+
os.unlink(filename)
|
144
|
+
zf.close()
|
145
|
+
print "Deleting temporary folder and files..."
|
146
|
+
try:
|
147
|
+
for root, dirs, files in os.walk(tmpdir, topdown=False):
|
148
|
+
for name in files:
|
149
|
+
os.remove(os.path.join(root, name))
|
150
|
+
for name in dirs:
|
151
|
+
os.rmdir(os.path.join(root, name))
|
152
|
+
os.rmdir(tmpdir)
|
153
|
+
except:
|
154
|
+
pass
|
155
|
+
# update versions.xml
|
156
|
+
versionsFile = os.path.join(os.path.abspath(defaults['conf_dir']), defaults['versionsFileName'])
|
157
|
+
print "Updating {0}...".format(defaults['versionsFileName'])
|
158
|
+
xmlversionsfile = open(versionsFile,'wb')
|
159
|
+
xmlversionsfile.write(versionXML.format(str(currentVersion),"formats-v"+str(currentVersion)+".xml"))
|
160
|
+
xmlversionsfile.close()
|
161
|
+
print "Preparing to convert PRONOM formats to FIDO signatures..."
|
162
|
+
# there should be a check here to handle prepare.main exit() signal (-1/0/1/...)
|
163
|
+
prepare.main()
|
164
|
+
print "FIDO signatures successfully updated"
|
165
|
+
sys.exit()
|
166
|
+
except KeyboardInterrupt:
|
167
|
+
print "\nAborting update"
|
168
|
+
sys.exit()
|
169
|
+
|
170
|
+
if __name__ == '__main__':
|
171
|
+
main(defaults)
|
Binary file
|
Binary file
|