libis-format 0.9.5-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.coveralls.yml +2 -0
- data/.gitignore +18 -0
- data/.travis.yml +41 -0
- data/Gemfile +5 -0
- data/LICENSE.txt +22 -0
- data/README.md +39 -0
- data/Rakefile +8 -0
- data/bin/droid +15 -0
- data/bin/fido +12 -0
- data/bin/pdf_copy +13 -0
- data/data/ISOcoated_v2_eci.icc +0 -0
- data/data/PDFA_def.ps +40 -0
- data/data/ead.xsd +2728 -0
- data/data/eciRGB_v2.icc +0 -0
- data/data/lias_formats.xml +106 -0
- data/data/types.yml +217 -0
- data/lib/libis/format/config.rb +35 -0
- data/lib/libis/format/converter/base.rb +101 -0
- data/lib/libis/format/converter/chain.rb +167 -0
- data/lib/libis/format/converter/image_converter.rb +214 -0
- data/lib/libis/format/converter/office_converter.rb +50 -0
- data/lib/libis/format/converter/pdf_converter.rb +139 -0
- data/lib/libis/format/converter/repository.rb +98 -0
- data/lib/libis/format/converter.rb +11 -0
- data/lib/libis/format/droid.rb +45 -0
- data/lib/libis/format/fido.rb +102 -0
- data/lib/libis/format/identifier.rb +189 -0
- data/lib/libis/format/office_to_pdf.rb +52 -0
- data/lib/libis/format/pdf_copy.rb +40 -0
- data/lib/libis/format/pdf_merge.rb +41 -0
- data/lib/libis/format/pdf_split.rb +39 -0
- data/lib/libis/format/pdf_to_pdfa.rb +76 -0
- data/lib/libis/format/pdfa_validator.rb +61 -0
- data/lib/libis/format/type_database.rb +170 -0
- data/lib/libis/format/version.rb +5 -0
- data/lib/libis/format.rb +23 -0
- data/lib/libis-format.rb +1 -0
- data/libis-format.gemspec +34 -0
- data/spec/converter_spec.rb +212 -0
- data/spec/data/Cevennes2.bmp +0 -0
- data/spec/data/Cevennes2.jp2 +0 -0
- data/spec/data/Cevennes2.ppm +22492 -0
- data/spec/data/test-ead.xml +392 -0
- data/spec/data/test-jpg.tif +0 -0
- data/spec/data/test-lzw.tif +0 -0
- data/spec/data/test-options.jpg +0 -0
- data/spec/data/test.bmp +0 -0
- data/spec/data/test.doc +0 -0
- data/spec/data/test.docx +0 -0
- data/spec/data/test.gif +0 -0
- data/spec/data/test.jpg +0 -0
- data/spec/data/test.ods +0 -0
- data/spec/data/test.odt +0 -0
- data/spec/data/test.pdf +0 -0
- data/spec/data/test.pdf.tif +0 -0
- data/spec/data/test.png +0 -0
- data/spec/data/test.ps +8631 -0
- data/spec/data/test.psd +0 -0
- data/spec/data/test.rtf +1455 -0
- data/spec/data/test.tif +0 -0
- data/spec/data/test.txt +12 -0
- data/spec/data/test.xcf +0 -0
- data/spec/data/test.xls +0 -0
- data/spec/data/test.xlsx +0 -0
- data/spec/data/test.xml +4 -0
- data/spec/data/test_pdfa.pdf +0 -0
- data/spec/identifier_spec.rb +60 -0
- data/spec/spec_helper.rb +9 -0
- data/spec/test_types.yml +12 -0
- data/spec/type_database_spec.rb +140 -0
- data/tools/PdfTool.jar +0 -0
- data/tools/bcpkix-jdk15on-1.49.jar +0 -0
- data/tools/bcprov-jdk15on-1.49.jar +0 -0
- data/tools/droid/DROID_SignatureFile_V82.xml +32681 -0
- data/tools/droid/container-signature-20150307.xml +2235 -0
- data/tools/droid/droid-command-line-6.1.5.jar +0 -0
- data/tools/droid/droid.bat +154 -0
- data/tools/droid/droid.sh +138 -0
- data/tools/droid/lib/XmlSchema-1.4.7.jar +0 -0
- data/tools/droid/lib/activation-1.1.jar +0 -0
- data/tools/droid/lib/antlr-2.7.7.jar +0 -0
- data/tools/droid/lib/antlr-3.2.jar +0 -0
- data/tools/droid/lib/antlr-runtime-3.2.jar +0 -0
- data/tools/droid/lib/aopalliance-1.0.jar +0 -0
- data/tools/droid/lib/asm-2.2.3.jar +0 -0
- data/tools/droid/lib/aspectjrt-1.7.2.jar +0 -0
- data/tools/droid/lib/aspectjweaver-1.7.2.jar +0 -0
- data/tools/droid/lib/bcmail-jdk14-138.jar +0 -0
- data/tools/droid/lib/bcprov-jdk14-138.jar +0 -0
- data/tools/droid/lib/beansbinding-1.2.1.jar +0 -0
- data/tools/droid/lib/byteseek-1.1.1.jar +0 -0
- data/tools/droid/lib/cglib-nodep-2.2.2.jar +0 -0
- data/tools/droid/lib/classmate-1.0.0.jar +0 -0
- data/tools/droid/lib/commons-cli-1.2.jar +0 -0
- data/tools/droid/lib/commons-codec-1.4.jar +0 -0
- data/tools/droid/lib/commons-collections-3.2.1.jar +0 -0
- data/tools/droid/lib/commons-compress-1.4.1.jar +0 -0
- data/tools/droid/lib/commons-configuration-1.8.jar +0 -0
- data/tools/droid/lib/commons-dbcp-1.4.jar +0 -0
- data/tools/droid/lib/commons-httpclient-3.1.jar +0 -0
- data/tools/droid/lib/commons-io-2.4.jar +0 -0
- data/tools/droid/lib/commons-lang-2.6.jar +0 -0
- data/tools/droid/lib/commons-logging-1.1.1.jar +0 -0
- data/tools/droid/lib/commons-pool-1.5.4.jar +0 -0
- data/tools/droid/lib/cxf-api-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-common-schemas-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-common-utilities-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-http-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-soap-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-xml-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-core-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-databinding-jaxb-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-frontend-jaxws-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-frontend-simple-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-transports-http-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-ws-addr-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-tools-common-2.2.12.jar +0 -0
- data/tools/droid/lib/de.huxhorn.lilith.3rdparty.flyingsaucer.core-renderer-8RC1.jar +0 -0
- data/tools/droid/lib/derby-10.10.2.0.jar +0 -0
- data/tools/droid/lib/dom4j-1.6.1.jar +0 -0
- data/tools/droid/lib/droid-container-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-core-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-core-interfaces-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-export-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-export-interfaces-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-help-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-report-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-report-interfaces-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-results-6.1.5.jar +0 -0
- data/tools/droid/lib/ejb3-persistence-1.0.2.GA.jar +0 -0
- data/tools/droid/lib/geronimo-activation_1.1_spec-1.0.2.jar +0 -0
- data/tools/droid/lib/geronimo-annotation_1.0_spec-1.1.1.jar +0 -0
- data/tools/droid/lib/geronimo-javamail_1.4_spec-1.6.jar +0 -0
- data/tools/droid/lib/geronimo-jaxws_2.1_spec-1.0.jar +0 -0
- data/tools/droid/lib/geronimo-stax-api_1.0_spec-1.0.1.jar +0 -0
- data/tools/droid/lib/geronimo-ws-metadata_2.0_spec-1.1.2.jar +0 -0
- data/tools/droid/lib/hibernate-commons-annotations-4.0.4.Final.jar +0 -0
- data/tools/droid/lib/hibernate-core-4.3.5.Final.jar +0 -0
- data/tools/droid/lib/hibernate-entitymanager-4.3.5.Final.jar +0 -0
- data/tools/droid/lib/hibernate-jpa-2.1-api-1.0.0.Final.jar +0 -0
- data/tools/droid/lib/hibernate-validator-5.1.0.Final.jar +0 -0
- data/tools/droid/lib/itext-2.0.8.jar +0 -0
- data/tools/droid/lib/jandex-1.1.0.Final.jar +0 -0
- data/tools/droid/lib/javahelp-2.0.05.jar +0 -0
- data/tools/droid/lib/javassist-3.18.1-GA.jar +0 -0
- data/tools/droid/lib/jaxb-api-2.1.jar +0 -0
- data/tools/droid/lib/jaxb-impl-2.1.13.jar +0 -0
- data/tools/droid/lib/jboss-logging-3.1.3.GA.jar +0 -0
- data/tools/droid/lib/jboss-logging-annotations-1.2.0.Beta1.jar +0 -0
- data/tools/droid/lib/jboss-transaction-api_1.2_spec-1.0.0.Final.jar +0 -0
- data/tools/droid/lib/joda-time-1.6.2.jar +0 -0
- data/tools/droid/lib/jra-1.0-alpha-4.jar +0 -0
- data/tools/droid/lib/jta-1.1.jar +0 -0
- data/tools/droid/lib/log4j-1.2.13.jar +0 -0
- data/tools/droid/lib/neethi-2.0.4.jar +0 -0
- data/tools/droid/lib/opencsv-2.3.jar +0 -0
- data/tools/droid/lib/org-netbeans-swing-outline-7.2.jar +0 -0
- data/tools/droid/lib/org-openide-util-7.2.jar +0 -0
- data/tools/droid/lib/org-openide-util-lookup-7.2.jar +0 -0
- data/tools/droid/lib/poi-3.7.jar +0 -0
- data/tools/droid/lib/saaj-api-1.3.jar +0 -0
- data/tools/droid/lib/saaj-impl-1.3.2.jar +0 -0
- data/tools/droid/lib/slf4j-api-1.4.2.jar +0 -0
- data/tools/droid/lib/slf4j-log4j12-1.4.2.jar +0 -0
- data/tools/droid/lib/spring-aop-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-beans-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-context-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-core-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-expression-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-jdbc-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-orm-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-tx-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-web-2.5.6.jar +0 -0
- data/tools/droid/lib/stax-api-1.0-2.jar +0 -0
- data/tools/droid/lib/stringtemplate-3.2.jar +0 -0
- data/tools/droid/lib/truezip-6.8.4.jar +0 -0
- data/tools/droid/lib/validation-api-1.1.0.Final.jar +0 -0
- data/tools/droid/lib/wsdl4j-1.6.2.jar +0 -0
- data/tools/droid/lib/wstx-asl-3.2.9.jar +0 -0
- data/tools/droid/lib/xercesImpl-2.9.1.jar +0 -0
- data/tools/droid/lib/xml-apis-1.3.04.jar +0 -0
- data/tools/droid/lib/xml-resolver-1.2.jar +0 -0
- data/tools/droid/lib/xz-1.0.jar +0 -0
- data/tools/fido/__init__.py +0 -0
- data/tools/fido/argparselocal.py +2355 -0
- data/tools/fido/conf/DROID_SignatureFile-v81.xml +2 -0
- data/tools/fido/conf/container-signature-20150307.xml +2238 -0
- data/tools/fido/conf/dc.xsd +119 -0
- data/tools/fido/conf/dcmitype.xsd +53 -0
- data/tools/fido/conf/dcterms.xsd +383 -0
- data/tools/fido/conf/fido-formats.xsd +173 -0
- data/tools/fido/conf/format_extension_template.xml +105 -0
- data/tools/fido/conf/format_extensions.xml +498 -0
- data/tools/fido/conf/formats-v81.xml +38355 -0
- data/tools/fido/conf/pronom-xml-v81.zip +0 -0
- data/tools/fido/conf/versions.xml +8 -0
- data/tools/fido/fido.bat +4 -0
- data/tools/fido/fido.py +854 -0
- data/tools/fido/fido.sh +5 -0
- data/tools/fido/prepare.py +616 -0
- data/tools/fido/pronomutils.py +115 -0
- data/tools/fido/toxml.py +52 -0
- data/tools/fido/update_signatures.py +171 -0
- data/tools/pdfbox/pdfbox-app-1.8.10.jar +0 -0
- data/tools/pdfbox/preflight-app-1.8.10.jar +0 -0
- metadata +396 -0
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
#
|
|
3
|
+
# PRONOM UTILS
|
|
4
|
+
#
|
|
5
|
+
# PYTHON FUNCTION TO QUERY PRONOM VERSION
|
|
6
|
+
# AND DOWNLOAD SIGNATUREFILE
|
|
7
|
+
# USES PRONOM SOAP SERVICE
|
|
8
|
+
#
|
|
9
|
+
# Open Planets Foundation (http://www.openplanetsfoundation.org)
|
|
10
|
+
# See License.txt for license information.
|
|
11
|
+
# Download from: http://github.com/openplanets/fido/downloads
|
|
12
|
+
# Author: Maurice de Rooij (OPF/NANETH), 2012
|
|
13
|
+
#
|
|
14
|
+
# PRONOM UTILS is a library used by FIDO
|
|
15
|
+
# FIDO uses the UK National Archives (TNA) PRONOM File Format and Container descriptions.
|
|
16
|
+
# PRONOM is available from http://www.nationalarchives.gov.uk/pronom/
|
|
17
|
+
#
|
|
18
|
+
import sys
|
|
19
|
+
from xml.dom import minidom
|
|
20
|
+
__pronomutils__ = {'version' : '1.0.1'}
|
|
21
|
+
|
|
22
|
+
def checkWellFormedness(filename,error=False):
|
|
23
|
+
"""
|
|
24
|
+
usage: checkWellFormedness(filename)
|
|
25
|
+
arguments:
|
|
26
|
+
"filename": returns true if filename is a valid XML file
|
|
27
|
+
"error": whether or not print to stderr upon error
|
|
28
|
+
"""
|
|
29
|
+
import xml.parsers.expat
|
|
30
|
+
parser = xml.parsers.expat.ParserCreate()
|
|
31
|
+
try:
|
|
32
|
+
parser.ParseFile(open(filename, "r"))
|
|
33
|
+
except Exception, e:
|
|
34
|
+
if error is not False:
|
|
35
|
+
sys.stderr.write("checkWellFormedness: %s: %s;\n" % (filename, e))
|
|
36
|
+
return False
|
|
37
|
+
return True
|
|
38
|
+
|
|
39
|
+
def getPronomSignature(type):
|
|
40
|
+
"""
|
|
41
|
+
usage: getPronomSignature(version|file)
|
|
42
|
+
arguments:
|
|
43
|
+
"version": returns latest signature file version number as int
|
|
44
|
+
"file": returns latest signature XML file as string
|
|
45
|
+
upon error: writes to stderr and returns false
|
|
46
|
+
"""
|
|
47
|
+
try:
|
|
48
|
+
import httplib
|
|
49
|
+
import re
|
|
50
|
+
import os
|
|
51
|
+
soapVersionContainer = """<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><getSignatureFileVersionV1 xmlns="http://pronom.nationalarchives.gov.uk" /></soap:Body></soap:Envelope>"""
|
|
52
|
+
soapFileContainer = """<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><getSignatureFileV1 xmlns="http://pronom.nationalarchives.gov.uk" /></soap:Body></soap:Envelope>"""
|
|
53
|
+
soapVersionHeader = """\"http://pronom.nationalarchives.gov.uk:getSignatureFileVersionV1In\""""
|
|
54
|
+
soapFileHeader = """\"http://pronom.nationalarchives.gov.uk:getSignatureFileV1In\""""
|
|
55
|
+
if type == "version":
|
|
56
|
+
soapAction = soapVersionHeader
|
|
57
|
+
soapStr = soapVersionContainer
|
|
58
|
+
elif type == "file":
|
|
59
|
+
soapAction = soapFileHeader
|
|
60
|
+
soapStr = soapFileContainer
|
|
61
|
+
else:
|
|
62
|
+
sys.stderr.write("getPronomSignature(): unknown type: "+type)
|
|
63
|
+
return False
|
|
64
|
+
webservice = httplib.HTTP("apps.nationalarchives.gov.uk")
|
|
65
|
+
webservice.putrequest("POST", "/pronom/service.asmx")
|
|
66
|
+
webservice.putheader("Host", "www.nationalarchives.gov.uk")
|
|
67
|
+
webservice.putheader("User-Agent", "PRONOM UTILS v{0} (OPF)".format(__pronomutils__['version']))
|
|
68
|
+
webservice.putheader("Content-type", "text/xml; charset=\"UTF-8\"")
|
|
69
|
+
webservice.putheader("Content-length", "%d" % len(soapStr))
|
|
70
|
+
webservice.putheader("SOAPAction", soapAction)
|
|
71
|
+
try:
|
|
72
|
+
webservice.endheaders()
|
|
73
|
+
except Exception, e:
|
|
74
|
+
sys.stderr.write("getPronomSignature(): failed to contact PRONOM;\n%s\n" % (e))
|
|
75
|
+
sys.exit()
|
|
76
|
+
webservice.send(soapStr)
|
|
77
|
+
statuscode, statusmessage, header = webservice.getreply()
|
|
78
|
+
if statuscode == 200:
|
|
79
|
+
xml = webservice.getfile()
|
|
80
|
+
if type == "version":
|
|
81
|
+
exp = re.compile("\<Version\>([0-9]{1,4})\<\/Version\>")
|
|
82
|
+
sigxml = exp.search(xml.read())
|
|
83
|
+
if len(sigxml.group(1)) > 0:
|
|
84
|
+
return int(sigxml.group(1))
|
|
85
|
+
else:
|
|
86
|
+
sys.stderr.write("getPronomSignature(): could not parse VERSION from SOAP response: "+type)
|
|
87
|
+
return False
|
|
88
|
+
if type == "file":
|
|
89
|
+
exp = re.compile("\<SignatureFile\>.*\<\/SignatureFile\>")
|
|
90
|
+
sigxml = exp.search(xml.read())
|
|
91
|
+
sigtxt = sigxml.group(0) if sigxml else ''
|
|
92
|
+
if len(sigtxt) > 0:
|
|
93
|
+
tmpfile = "./tmp_getPronomSignature.xml"
|
|
94
|
+
tmp = open(tmpfile,'wb')
|
|
95
|
+
tmp.write("""<?xml version="1.0" encoding="UTF-8"?>"""+"\n")
|
|
96
|
+
tmp.write(sigtxt)
|
|
97
|
+
tmp.close()
|
|
98
|
+
if not checkWellFormedness(tmpfile):
|
|
99
|
+
os.unlink(tmpfile)
|
|
100
|
+
sys.stderr.write("getPronomSignature(): signaturefile not well formed")
|
|
101
|
+
return False
|
|
102
|
+
else:
|
|
103
|
+
os.unlink(tmpfile)
|
|
104
|
+
return """<?xml version="1.0" encoding="UTF-8"?>"""+"\n"+sigtxt
|
|
105
|
+
else:
|
|
106
|
+
sys.stderr.write("getPronomSignature(): could not parse XML from SOAP response: "+type)
|
|
107
|
+
return False
|
|
108
|
+
else:
|
|
109
|
+
sys.stderr.write("getPronomSignature(): webservice error: '"+str(statuscode)+" "+statusmessage+"'\n")
|
|
110
|
+
return False
|
|
111
|
+
print sys.stderr.write("getPronomSignature(): unexpected return")
|
|
112
|
+
return False
|
|
113
|
+
except Exception, e:
|
|
114
|
+
print sys.stderr.write("getPronomSignature(): unknown error: "+str(e))
|
|
115
|
+
return False
|
data/tools/fido/toxml.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
#!python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
#
|
|
4
|
+
# FIDO csv output to XML
|
|
5
|
+
# Author: Maurice de Rooij <maurice.de.rooij@nationaalarchief.nl>, september 2011
|
|
6
|
+
#
|
|
7
|
+
# Usage in combination with FIDO:
|
|
8
|
+
# Windows: python fido.py [ARGS] | python toxml.py > output.xml
|
|
9
|
+
# Linux: fido.py [ARGS] | toxml.py > output.xml
|
|
10
|
+
#
|
|
11
|
+
# Usage afterwards:
|
|
12
|
+
# Windows: type output.csv | toxml.py > output.xml
|
|
13
|
+
# Linux: cat output.csv | toxml.py > output.xml
|
|
14
|
+
#
|
|
15
|
+
# for difference in usage, see:
|
|
16
|
+
# http://bugs.python.org/issue9390
|
|
17
|
+
# http://support.microsoft.com/default.aspx?kbid=321788
|
|
18
|
+
#
|
|
19
|
+
|
|
20
|
+
import sys
|
|
21
|
+
import csv
|
|
22
|
+
import string
|
|
23
|
+
|
|
24
|
+
# define FIDO version
|
|
25
|
+
fidoVersion = '1.0'
|
|
26
|
+
# define PRONOM signature version
|
|
27
|
+
signatureVersion = '56'
|
|
28
|
+
|
|
29
|
+
sys.stdout.write("""<?xml version="1.0" encoding="utf-8"?>
|
|
30
|
+
<fido_output>
|
|
31
|
+
<versions>
|
|
32
|
+
<fido_version>{0}</fido_version>
|
|
33
|
+
<signature_version>{1}</signature_version>
|
|
34
|
+
</versions>""".format(fidoVersion,signatureVersion))
|
|
35
|
+
|
|
36
|
+
reader = csv.reader(sys.stdin)
|
|
37
|
+
|
|
38
|
+
for row in reader:
|
|
39
|
+
sys.stdout.write("""
|
|
40
|
+
<file>
|
|
41
|
+
<filename>{0}</filename>
|
|
42
|
+
<status>{1}</status>
|
|
43
|
+
<matchtype>{2}</matchtype>
|
|
44
|
+
<time>{3}</time>
|
|
45
|
+
<puid>{4}</puid>
|
|
46
|
+
<mimetype>{5}</mimetype>
|
|
47
|
+
<formatname>{6}</formatname>
|
|
48
|
+
<signaturename>{7}</signaturename>
|
|
49
|
+
<filesize>{8}</filesize>
|
|
50
|
+
</file>""".format(row[6],row[0],row[8],row[1],row[2],row[7],row[3],row[4],row[5]))
|
|
51
|
+
|
|
52
|
+
sys.stdout.write("\n</fido_output>\n")
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
#!python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
#
|
|
4
|
+
# FIDO SIGNATURE UPDATER
|
|
5
|
+
#
|
|
6
|
+
# Open Planets Foundation (http://www.openplanetsfoundation.org)
|
|
7
|
+
# See License.txt for license information.
|
|
8
|
+
# Download from: https://github.com/openplanets/fido/releases
|
|
9
|
+
# Author: Maurice de Rooij (NANETH), 2012
|
|
10
|
+
#
|
|
11
|
+
# FIDO uses the UK National Archives (TNA) PRONOM File Format and Container descriptions .
|
|
12
|
+
# PRONOM is available from http://www.nationalarchives.gov.uk/pronom/
|
|
13
|
+
#
|
|
14
|
+
import sys, os, urllib, time, zipfile, shutil
|
|
15
|
+
|
|
16
|
+
from xml.etree import ElementTree as CET
|
|
17
|
+
from xml.etree import ElementTree as VET
|
|
18
|
+
from pronomutils import getPronomSignature, checkWellFormedness
|
|
19
|
+
import prepare
|
|
20
|
+
|
|
21
|
+
defaults = {
|
|
22
|
+
'version': '1.2.2',
|
|
23
|
+
'conf_dir': os.path.join(os.path.dirname(__file__), 'conf'),
|
|
24
|
+
'tmp_dir': 'tmp',
|
|
25
|
+
'signatureFileName' : 'DROID_SignatureFile-v{0}.xml',
|
|
26
|
+
'pronomZipFileName' : 'pronom-xml-v{0}.zip',
|
|
27
|
+
'fidoSignatureVersion' : 'format_extensions.xml',
|
|
28
|
+
'versionsFileName' : 'versions.xml',
|
|
29
|
+
'http_throttle' : 0.5, # in secs, to prevent DoS of PRONOM server
|
|
30
|
+
'containerVersion' : 'container-signature-20130501.xml', # container version is frozen and needs human attention before updating
|
|
31
|
+
'versionXML' : """<?xml version="1.0" encoding="UTF-8"?>\n<versions>\n\t<pronomVersion>{0}</pronomVersion>\n\t<pronomSignature>{1}</pronomSignature>\n\t<pronomContainerSignature>{2}</pronomContainerSignature>\n\t<fidoExtensionSignature>{3}</fidoExtensionSignature>\n\t<updateScript>{4}</updateScript>\n</versions>"""
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
def main(defaults):
|
|
35
|
+
"""
|
|
36
|
+
Updates PRONOM signatures
|
|
37
|
+
Interactive script, requires keyboard input
|
|
38
|
+
"""
|
|
39
|
+
try:
|
|
40
|
+
resume_download = False
|
|
41
|
+
answers = ['y','yes']
|
|
42
|
+
versionXML = defaults['versionXML'].format("{0}","{1}",defaults['containerVersion'],defaults['fidoSignatureVersion'],defaults['version'])
|
|
43
|
+
#print versionXML
|
|
44
|
+
print "FIDO signature updater v"+defaults['version']
|
|
45
|
+
print "Contacting PRONOM..."
|
|
46
|
+
currentVersion = getPronomSignature("version")
|
|
47
|
+
if currentVersion == False:
|
|
48
|
+
print "Failed to obtain PRONOM signature file version number, please try again"
|
|
49
|
+
sys.exit()
|
|
50
|
+
print "Querying latest signaturefile version..."
|
|
51
|
+
signatureFile = os.path.join(os.path.abspath(defaults['conf_dir']), defaults['signatureFileName'].format(currentVersion))
|
|
52
|
+
if os.path.isfile(signatureFile):
|
|
53
|
+
print "You already have the latest PRONOM signature file, version "+str(currentVersion)
|
|
54
|
+
ask = raw_input("Update anyway? (yes/no): ")
|
|
55
|
+
if ask.lower() not in answers:
|
|
56
|
+
sys.exit()
|
|
57
|
+
print "Downloading signature file version "+str(currentVersion)+"..."
|
|
58
|
+
currentFile = getPronomSignature("file")
|
|
59
|
+
if currentFile == False:
|
|
60
|
+
print "Failed to obtain PRONOM signature file, please try again"
|
|
61
|
+
exit()
|
|
62
|
+
sigfile = open(signatureFile,'wb')
|
|
63
|
+
sigfile.write(currentFile)
|
|
64
|
+
sigfile.close()
|
|
65
|
+
print "Writing {0}...".format(defaults['signatureFileName'].format(currentVersion))
|
|
66
|
+
print "Extracting PRONOM PUID's from signature file..."
|
|
67
|
+
tree = CET.parse(signatureFile)
|
|
68
|
+
puids = []
|
|
69
|
+
for node in tree.iter("{http://www.nationalarchives.gov.uk/pronom/SignatureFile}FileFormat"):
|
|
70
|
+
puids.append(node.get("PUID"))
|
|
71
|
+
numberPuids = len(puids)
|
|
72
|
+
print "Found "+str(numberPuids)+" PRONOM PUID's"
|
|
73
|
+
print "Downloading signatures can take a while"
|
|
74
|
+
ask = raw_input("Continue and download signatures? (yes/no): ")
|
|
75
|
+
if ask.lower() not in answers:
|
|
76
|
+
print "Aborting update..."
|
|
77
|
+
sys.exit()
|
|
78
|
+
tmpdir = os.path.join(os.path.abspath(defaults['conf_dir']), defaults['tmp_dir'])
|
|
79
|
+
if os.path.isdir(tmpdir):
|
|
80
|
+
print "Found previously created temporary folder for download:", tmpdir
|
|
81
|
+
ask = raw_input("Resume download (yes) or start over (no)?: ")
|
|
82
|
+
if ask.lower() in answers:
|
|
83
|
+
print "Resuming download..."
|
|
84
|
+
resume_download = True
|
|
85
|
+
else:
|
|
86
|
+
resume_download = False
|
|
87
|
+
else:
|
|
88
|
+
print "Creating temporary folder for download:", tmpdir
|
|
89
|
+
try:
|
|
90
|
+
os.mkdir(tmpdir)
|
|
91
|
+
except:
|
|
92
|
+
pass
|
|
93
|
+
if not os.path.isdir(tmpdir):
|
|
94
|
+
tmpdir = os.path.join(os.path.abspath(defaults['conf_dir']))
|
|
95
|
+
print "Failed to create temporary folder for PUID's, using", tmpdir
|
|
96
|
+
print "Downloading signatures, one moment please..."
|
|
97
|
+
one_percent = (float(numberPuids) / 100)
|
|
98
|
+
numfiles = 0
|
|
99
|
+
for puid in puids:
|
|
100
|
+
puidType, puidNum = puid.split("/")
|
|
101
|
+
puidFileName = "puid."+puidType+"."+puidNum+".xml"
|
|
102
|
+
filename = os.path.join(tmpdir, puidFileName)
|
|
103
|
+
if os.path.isfile(filename) and checkWellFormedness(filename) and resume_download is not False:
|
|
104
|
+
numfiles += 1
|
|
105
|
+
continue
|
|
106
|
+
puidUrl = "http://www.nationalarchives.gov.uk/pronom/"+puid+".xml"
|
|
107
|
+
try:
|
|
108
|
+
filehandle = urllib.urlopen(puidUrl)
|
|
109
|
+
except Exception, e:
|
|
110
|
+
print "Failed to download signaturefile:", puidUrl
|
|
111
|
+
print "Error:", str(e)
|
|
112
|
+
print "Please restart and resume download"
|
|
113
|
+
sys.exit()
|
|
114
|
+
puidfile = open(filename,'wb')
|
|
115
|
+
for lines in filehandle.readlines():
|
|
116
|
+
puidfile.write(lines)
|
|
117
|
+
puidfile.close()
|
|
118
|
+
filehandle.close()
|
|
119
|
+
if not checkWellFormedness(filename):
|
|
120
|
+
os.unlink(filename)
|
|
121
|
+
continue
|
|
122
|
+
numfiles += 1
|
|
123
|
+
percent = int(float(numfiles) / one_percent)
|
|
124
|
+
print "\r",
|
|
125
|
+
print str(percent)+"%",
|
|
126
|
+
time.sleep(defaults['http_throttle'])
|
|
127
|
+
print "100%"
|
|
128
|
+
try:
|
|
129
|
+
import zlib
|
|
130
|
+
compression = zipfile.ZIP_DEFLATED
|
|
131
|
+
except:
|
|
132
|
+
compression = zipfile.ZIP_STORED
|
|
133
|
+
modes = {zipfile.ZIP_DEFLATED: 'deflated', zipfile.ZIP_STORED: 'stored'}
|
|
134
|
+
print "Creating PRONOM zip,",
|
|
135
|
+
zf = zipfile.ZipFile(os.path.join(os.path.abspath(defaults['conf_dir']), defaults['pronomZipFileName'].format(currentVersion)), mode='w')
|
|
136
|
+
print "adding files with compression mode '"+modes[compression]+"'"
|
|
137
|
+
for puid in puids:
|
|
138
|
+
puidType, puidNum = puid.split("/")
|
|
139
|
+
puidFileName = "puid."+puidType+"."+puidNum+".xml"
|
|
140
|
+
filename = os.path.join(os.path.abspath(defaults['conf_dir']), defaults['tmp_dir'], puidFileName)
|
|
141
|
+
if os.path.isfile(filename):
|
|
142
|
+
zf.write(filename, arcname=puidFileName, compress_type=compression)
|
|
143
|
+
os.unlink(filename)
|
|
144
|
+
zf.close()
|
|
145
|
+
print "Deleting temporary folder and files..."
|
|
146
|
+
try:
|
|
147
|
+
for root, dirs, files in os.walk(tmpdir, topdown=False):
|
|
148
|
+
for name in files:
|
|
149
|
+
os.remove(os.path.join(root, name))
|
|
150
|
+
for name in dirs:
|
|
151
|
+
os.rmdir(os.path.join(root, name))
|
|
152
|
+
os.rmdir(tmpdir)
|
|
153
|
+
except:
|
|
154
|
+
pass
|
|
155
|
+
# update versions.xml
|
|
156
|
+
versionsFile = os.path.join(os.path.abspath(defaults['conf_dir']), defaults['versionsFileName'])
|
|
157
|
+
print "Updating {0}...".format(defaults['versionsFileName'])
|
|
158
|
+
xmlversionsfile = open(versionsFile,'wb')
|
|
159
|
+
xmlversionsfile.write(versionXML.format(str(currentVersion),"formats-v"+str(currentVersion)+".xml"))
|
|
160
|
+
xmlversionsfile.close()
|
|
161
|
+
print "Preparing to convert PRONOM formats to FIDO signatures..."
|
|
162
|
+
# there should be a check here to handle prepare.main exit() signal (-1/0/1/...)
|
|
163
|
+
prepare.main()
|
|
164
|
+
print "FIDO signatures successfully updated"
|
|
165
|
+
sys.exit()
|
|
166
|
+
except KeyboardInterrupt:
|
|
167
|
+
print "\nAborting update"
|
|
168
|
+
sys.exit()
|
|
169
|
+
|
|
170
|
+
if __name__ == '__main__':
|
|
171
|
+
main(defaults)
|
|
Binary file
|
|
Binary file
|