libis-format 0.9.5-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (207) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +2 -0
  3. data/.gitignore +18 -0
  4. data/.travis.yml +41 -0
  5. data/Gemfile +5 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +39 -0
  8. data/Rakefile +8 -0
  9. data/bin/droid +15 -0
  10. data/bin/fido +12 -0
  11. data/bin/pdf_copy +13 -0
  12. data/data/ISOcoated_v2_eci.icc +0 -0
  13. data/data/PDFA_def.ps +40 -0
  14. data/data/ead.xsd +2728 -0
  15. data/data/eciRGB_v2.icc +0 -0
  16. data/data/lias_formats.xml +106 -0
  17. data/data/types.yml +217 -0
  18. data/lib/libis/format/config.rb +35 -0
  19. data/lib/libis/format/converter/base.rb +101 -0
  20. data/lib/libis/format/converter/chain.rb +167 -0
  21. data/lib/libis/format/converter/image_converter.rb +214 -0
  22. data/lib/libis/format/converter/office_converter.rb +50 -0
  23. data/lib/libis/format/converter/pdf_converter.rb +139 -0
  24. data/lib/libis/format/converter/repository.rb +98 -0
  25. data/lib/libis/format/converter.rb +11 -0
  26. data/lib/libis/format/droid.rb +45 -0
  27. data/lib/libis/format/fido.rb +102 -0
  28. data/lib/libis/format/identifier.rb +189 -0
  29. data/lib/libis/format/office_to_pdf.rb +52 -0
  30. data/lib/libis/format/pdf_copy.rb +40 -0
  31. data/lib/libis/format/pdf_merge.rb +41 -0
  32. data/lib/libis/format/pdf_split.rb +39 -0
  33. data/lib/libis/format/pdf_to_pdfa.rb +76 -0
  34. data/lib/libis/format/pdfa_validator.rb +61 -0
  35. data/lib/libis/format/type_database.rb +170 -0
  36. data/lib/libis/format/version.rb +5 -0
  37. data/lib/libis/format.rb +23 -0
  38. data/lib/libis-format.rb +1 -0
  39. data/libis-format.gemspec +34 -0
  40. data/spec/converter_spec.rb +212 -0
  41. data/spec/data/Cevennes2.bmp +0 -0
  42. data/spec/data/Cevennes2.jp2 +0 -0
  43. data/spec/data/Cevennes2.ppm +22492 -0
  44. data/spec/data/test-ead.xml +392 -0
  45. data/spec/data/test-jpg.tif +0 -0
  46. data/spec/data/test-lzw.tif +0 -0
  47. data/spec/data/test-options.jpg +0 -0
  48. data/spec/data/test.bmp +0 -0
  49. data/spec/data/test.doc +0 -0
  50. data/spec/data/test.docx +0 -0
  51. data/spec/data/test.gif +0 -0
  52. data/spec/data/test.jpg +0 -0
  53. data/spec/data/test.ods +0 -0
  54. data/spec/data/test.odt +0 -0
  55. data/spec/data/test.pdf +0 -0
  56. data/spec/data/test.pdf.tif +0 -0
  57. data/spec/data/test.png +0 -0
  58. data/spec/data/test.ps +8631 -0
  59. data/spec/data/test.psd +0 -0
  60. data/spec/data/test.rtf +1455 -0
  61. data/spec/data/test.tif +0 -0
  62. data/spec/data/test.txt +12 -0
  63. data/spec/data/test.xcf +0 -0
  64. data/spec/data/test.xls +0 -0
  65. data/spec/data/test.xlsx +0 -0
  66. data/spec/data/test.xml +4 -0
  67. data/spec/data/test_pdfa.pdf +0 -0
  68. data/spec/identifier_spec.rb +60 -0
  69. data/spec/spec_helper.rb +9 -0
  70. data/spec/test_types.yml +12 -0
  71. data/spec/type_database_spec.rb +140 -0
  72. data/tools/PdfTool.jar +0 -0
  73. data/tools/bcpkix-jdk15on-1.49.jar +0 -0
  74. data/tools/bcprov-jdk15on-1.49.jar +0 -0
  75. data/tools/droid/DROID_SignatureFile_V82.xml +32681 -0
  76. data/tools/droid/container-signature-20150307.xml +2235 -0
  77. data/tools/droid/droid-command-line-6.1.5.jar +0 -0
  78. data/tools/droid/droid.bat +154 -0
  79. data/tools/droid/droid.sh +138 -0
  80. data/tools/droid/lib/XmlSchema-1.4.7.jar +0 -0
  81. data/tools/droid/lib/activation-1.1.jar +0 -0
  82. data/tools/droid/lib/antlr-2.7.7.jar +0 -0
  83. data/tools/droid/lib/antlr-3.2.jar +0 -0
  84. data/tools/droid/lib/antlr-runtime-3.2.jar +0 -0
  85. data/tools/droid/lib/aopalliance-1.0.jar +0 -0
  86. data/tools/droid/lib/asm-2.2.3.jar +0 -0
  87. data/tools/droid/lib/aspectjrt-1.7.2.jar +0 -0
  88. data/tools/droid/lib/aspectjweaver-1.7.2.jar +0 -0
  89. data/tools/droid/lib/bcmail-jdk14-138.jar +0 -0
  90. data/tools/droid/lib/bcprov-jdk14-138.jar +0 -0
  91. data/tools/droid/lib/beansbinding-1.2.1.jar +0 -0
  92. data/tools/droid/lib/byteseek-1.1.1.jar +0 -0
  93. data/tools/droid/lib/cglib-nodep-2.2.2.jar +0 -0
  94. data/tools/droid/lib/classmate-1.0.0.jar +0 -0
  95. data/tools/droid/lib/commons-cli-1.2.jar +0 -0
  96. data/tools/droid/lib/commons-codec-1.4.jar +0 -0
  97. data/tools/droid/lib/commons-collections-3.2.1.jar +0 -0
  98. data/tools/droid/lib/commons-compress-1.4.1.jar +0 -0
  99. data/tools/droid/lib/commons-configuration-1.8.jar +0 -0
  100. data/tools/droid/lib/commons-dbcp-1.4.jar +0 -0
  101. data/tools/droid/lib/commons-httpclient-3.1.jar +0 -0
  102. data/tools/droid/lib/commons-io-2.4.jar +0 -0
  103. data/tools/droid/lib/commons-lang-2.6.jar +0 -0
  104. data/tools/droid/lib/commons-logging-1.1.1.jar +0 -0
  105. data/tools/droid/lib/commons-pool-1.5.4.jar +0 -0
  106. data/tools/droid/lib/cxf-api-2.2.12.jar +0 -0
  107. data/tools/droid/lib/cxf-common-schemas-2.2.12.jar +0 -0
  108. data/tools/droid/lib/cxf-common-utilities-2.2.12.jar +0 -0
  109. data/tools/droid/lib/cxf-rt-bindings-http-2.2.12.jar +0 -0
  110. data/tools/droid/lib/cxf-rt-bindings-soap-2.2.12.jar +0 -0
  111. data/tools/droid/lib/cxf-rt-bindings-xml-2.2.12.jar +0 -0
  112. data/tools/droid/lib/cxf-rt-core-2.2.12.jar +0 -0
  113. data/tools/droid/lib/cxf-rt-databinding-jaxb-2.2.12.jar +0 -0
  114. data/tools/droid/lib/cxf-rt-frontend-jaxws-2.2.12.jar +0 -0
  115. data/tools/droid/lib/cxf-rt-frontend-simple-2.2.12.jar +0 -0
  116. data/tools/droid/lib/cxf-rt-transports-http-2.2.12.jar +0 -0
  117. data/tools/droid/lib/cxf-rt-ws-addr-2.2.12.jar +0 -0
  118. data/tools/droid/lib/cxf-tools-common-2.2.12.jar +0 -0
  119. data/tools/droid/lib/de.huxhorn.lilith.3rdparty.flyingsaucer.core-renderer-8RC1.jar +0 -0
  120. data/tools/droid/lib/derby-10.10.2.0.jar +0 -0
  121. data/tools/droid/lib/dom4j-1.6.1.jar +0 -0
  122. data/tools/droid/lib/droid-container-6.1.5.jar +0 -0
  123. data/tools/droid/lib/droid-core-6.1.5.jar +0 -0
  124. data/tools/droid/lib/droid-core-interfaces-6.1.5.jar +0 -0
  125. data/tools/droid/lib/droid-export-6.1.5.jar +0 -0
  126. data/tools/droid/lib/droid-export-interfaces-6.1.5.jar +0 -0
  127. data/tools/droid/lib/droid-help-6.1.5.jar +0 -0
  128. data/tools/droid/lib/droid-report-6.1.5.jar +0 -0
  129. data/tools/droid/lib/droid-report-interfaces-6.1.5.jar +0 -0
  130. data/tools/droid/lib/droid-results-6.1.5.jar +0 -0
  131. data/tools/droid/lib/ejb3-persistence-1.0.2.GA.jar +0 -0
  132. data/tools/droid/lib/geronimo-activation_1.1_spec-1.0.2.jar +0 -0
  133. data/tools/droid/lib/geronimo-annotation_1.0_spec-1.1.1.jar +0 -0
  134. data/tools/droid/lib/geronimo-javamail_1.4_spec-1.6.jar +0 -0
  135. data/tools/droid/lib/geronimo-jaxws_2.1_spec-1.0.jar +0 -0
  136. data/tools/droid/lib/geronimo-stax-api_1.0_spec-1.0.1.jar +0 -0
  137. data/tools/droid/lib/geronimo-ws-metadata_2.0_spec-1.1.2.jar +0 -0
  138. data/tools/droid/lib/hibernate-commons-annotations-4.0.4.Final.jar +0 -0
  139. data/tools/droid/lib/hibernate-core-4.3.5.Final.jar +0 -0
  140. data/tools/droid/lib/hibernate-entitymanager-4.3.5.Final.jar +0 -0
  141. data/tools/droid/lib/hibernate-jpa-2.1-api-1.0.0.Final.jar +0 -0
  142. data/tools/droid/lib/hibernate-validator-5.1.0.Final.jar +0 -0
  143. data/tools/droid/lib/itext-2.0.8.jar +0 -0
  144. data/tools/droid/lib/jandex-1.1.0.Final.jar +0 -0
  145. data/tools/droid/lib/javahelp-2.0.05.jar +0 -0
  146. data/tools/droid/lib/javassist-3.18.1-GA.jar +0 -0
  147. data/tools/droid/lib/jaxb-api-2.1.jar +0 -0
  148. data/tools/droid/lib/jaxb-impl-2.1.13.jar +0 -0
  149. data/tools/droid/lib/jboss-logging-3.1.3.GA.jar +0 -0
  150. data/tools/droid/lib/jboss-logging-annotations-1.2.0.Beta1.jar +0 -0
  151. data/tools/droid/lib/jboss-transaction-api_1.2_spec-1.0.0.Final.jar +0 -0
  152. data/tools/droid/lib/joda-time-1.6.2.jar +0 -0
  153. data/tools/droid/lib/jra-1.0-alpha-4.jar +0 -0
  154. data/tools/droid/lib/jta-1.1.jar +0 -0
  155. data/tools/droid/lib/log4j-1.2.13.jar +0 -0
  156. data/tools/droid/lib/neethi-2.0.4.jar +0 -0
  157. data/tools/droid/lib/opencsv-2.3.jar +0 -0
  158. data/tools/droid/lib/org-netbeans-swing-outline-7.2.jar +0 -0
  159. data/tools/droid/lib/org-openide-util-7.2.jar +0 -0
  160. data/tools/droid/lib/org-openide-util-lookup-7.2.jar +0 -0
  161. data/tools/droid/lib/poi-3.7.jar +0 -0
  162. data/tools/droid/lib/saaj-api-1.3.jar +0 -0
  163. data/tools/droid/lib/saaj-impl-1.3.2.jar +0 -0
  164. data/tools/droid/lib/slf4j-api-1.4.2.jar +0 -0
  165. data/tools/droid/lib/slf4j-log4j12-1.4.2.jar +0 -0
  166. data/tools/droid/lib/spring-aop-4.0.3.RELEASE.jar +0 -0
  167. data/tools/droid/lib/spring-beans-4.0.3.RELEASE.jar +0 -0
  168. data/tools/droid/lib/spring-context-4.0.3.RELEASE.jar +0 -0
  169. data/tools/droid/lib/spring-core-4.0.3.RELEASE.jar +0 -0
  170. data/tools/droid/lib/spring-expression-4.0.3.RELEASE.jar +0 -0
  171. data/tools/droid/lib/spring-jdbc-4.0.3.RELEASE.jar +0 -0
  172. data/tools/droid/lib/spring-orm-4.0.3.RELEASE.jar +0 -0
  173. data/tools/droid/lib/spring-tx-4.0.3.RELEASE.jar +0 -0
  174. data/tools/droid/lib/spring-web-2.5.6.jar +0 -0
  175. data/tools/droid/lib/stax-api-1.0-2.jar +0 -0
  176. data/tools/droid/lib/stringtemplate-3.2.jar +0 -0
  177. data/tools/droid/lib/truezip-6.8.4.jar +0 -0
  178. data/tools/droid/lib/validation-api-1.1.0.Final.jar +0 -0
  179. data/tools/droid/lib/wsdl4j-1.6.2.jar +0 -0
  180. data/tools/droid/lib/wstx-asl-3.2.9.jar +0 -0
  181. data/tools/droid/lib/xercesImpl-2.9.1.jar +0 -0
  182. data/tools/droid/lib/xml-apis-1.3.04.jar +0 -0
  183. data/tools/droid/lib/xml-resolver-1.2.jar +0 -0
  184. data/tools/droid/lib/xz-1.0.jar +0 -0
  185. data/tools/fido/__init__.py +0 -0
  186. data/tools/fido/argparselocal.py +2355 -0
  187. data/tools/fido/conf/DROID_SignatureFile-v81.xml +2 -0
  188. data/tools/fido/conf/container-signature-20150307.xml +2238 -0
  189. data/tools/fido/conf/dc.xsd +119 -0
  190. data/tools/fido/conf/dcmitype.xsd +53 -0
  191. data/tools/fido/conf/dcterms.xsd +383 -0
  192. data/tools/fido/conf/fido-formats.xsd +173 -0
  193. data/tools/fido/conf/format_extension_template.xml +105 -0
  194. data/tools/fido/conf/format_extensions.xml +498 -0
  195. data/tools/fido/conf/formats-v81.xml +38355 -0
  196. data/tools/fido/conf/pronom-xml-v81.zip +0 -0
  197. data/tools/fido/conf/versions.xml +8 -0
  198. data/tools/fido/fido.bat +4 -0
  199. data/tools/fido/fido.py +854 -0
  200. data/tools/fido/fido.sh +5 -0
  201. data/tools/fido/prepare.py +616 -0
  202. data/tools/fido/pronomutils.py +115 -0
  203. data/tools/fido/toxml.py +52 -0
  204. data/tools/fido/update_signatures.py +171 -0
  205. data/tools/pdfbox/pdfbox-app-1.8.10.jar +0 -0
  206. data/tools/pdfbox/preflight-app-1.8.10.jar +0 -0
  207. metadata +396 -0
@@ -0,0 +1,115 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # PRONOM UTILS
4
+ #
5
+ # PYTHON FUNCTION TO QUERY PRONOM VERSION
6
+ # AND DOWNLOAD SIGNATUREFILE
7
+ # USES PRONOM SOAP SERVICE
8
+ #
9
+ # Open Planets Foundation (http://www.openplanetsfoundation.org)
10
+ # See License.txt for license information.
11
+ # Download from: http://github.com/openplanets/fido/downloads
12
+ # Author: Maurice de Rooij (OPF/NANETH), 2012
13
+ #
14
+ # PRONOM UTILS is a library used by FIDO
15
+ # FIDO uses the UK National Archives (TNA) PRONOM File Format and Container descriptions.
16
+ # PRONOM is available from http://www.nationalarchives.gov.uk/pronom/
17
+ #
18
+ import sys
19
+ from xml.dom import minidom
20
+ __pronomutils__ = {'version' : '1.0.1'}
21
+
22
+ def checkWellFormedness(filename,error=False):
23
+ """
24
+ usage: checkWellFormedness(filename)
25
+ arguments:
26
+ "filename": returns true if filename is a valid XML file
27
+ "error": whether or not print to stderr upon error
28
+ """
29
+ import xml.parsers.expat
30
+ parser = xml.parsers.expat.ParserCreate()
31
+ try:
32
+ parser.ParseFile(open(filename, "r"))
33
+ except Exception, e:
34
+ if error is not False:
35
+ sys.stderr.write("checkWellFormedness: %s: %s;\n" % (filename, e))
36
+ return False
37
+ return True
38
+
39
+ def getPronomSignature(type):
40
+ """
41
+ usage: getPronomSignature(version|file)
42
+ arguments:
43
+ "version": returns latest signature file version number as int
44
+ "file": returns latest signature XML file as string
45
+ upon error: writes to stderr and returns false
46
+ """
47
+ try:
48
+ import httplib
49
+ import re
50
+ import os
51
+ soapVersionContainer = """<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><getSignatureFileVersionV1 xmlns="http://pronom.nationalarchives.gov.uk" /></soap:Body></soap:Envelope>"""
52
+ soapFileContainer = """<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><getSignatureFileV1 xmlns="http://pronom.nationalarchives.gov.uk" /></soap:Body></soap:Envelope>"""
53
+ soapVersionHeader = """\"http://pronom.nationalarchives.gov.uk:getSignatureFileVersionV1In\""""
54
+ soapFileHeader = """\"http://pronom.nationalarchives.gov.uk:getSignatureFileV1In\""""
55
+ if type == "version":
56
+ soapAction = soapVersionHeader
57
+ soapStr = soapVersionContainer
58
+ elif type == "file":
59
+ soapAction = soapFileHeader
60
+ soapStr = soapFileContainer
61
+ else:
62
+ sys.stderr.write("getPronomSignature(): unknown type: "+type)
63
+ return False
64
+ webservice = httplib.HTTP("apps.nationalarchives.gov.uk")
65
+ webservice.putrequest("POST", "/pronom/service.asmx")
66
+ webservice.putheader("Host", "www.nationalarchives.gov.uk")
67
+ webservice.putheader("User-Agent", "PRONOM UTILS v{0} (OPF)".format(__pronomutils__['version']))
68
+ webservice.putheader("Content-type", "text/xml; charset=\"UTF-8\"")
69
+ webservice.putheader("Content-length", "%d" % len(soapStr))
70
+ webservice.putheader("SOAPAction", soapAction)
71
+ try:
72
+ webservice.endheaders()
73
+ except Exception, e:
74
+ sys.stderr.write("getPronomSignature(): failed to contact PRONOM;\n%s\n" % (e))
75
+ sys.exit()
76
+ webservice.send(soapStr)
77
+ statuscode, statusmessage, header = webservice.getreply()
78
+ if statuscode == 200:
79
+ xml = webservice.getfile()
80
+ if type == "version":
81
+ exp = re.compile("\<Version\>([0-9]{1,4})\<\/Version\>")
82
+ sigxml = exp.search(xml.read())
83
+ if len(sigxml.group(1)) > 0:
84
+ return int(sigxml.group(1))
85
+ else:
86
+ sys.stderr.write("getPronomSignature(): could not parse VERSION from SOAP response: "+type)
87
+ return False
88
+ if type == "file":
89
+ exp = re.compile("\<SignatureFile\>.*\<\/SignatureFile\>")
90
+ sigxml = exp.search(xml.read())
91
+ sigtxt = sigxml.group(0) if sigxml else ''
92
+ if len(sigtxt) > 0:
93
+ tmpfile = "./tmp_getPronomSignature.xml"
94
+ tmp = open(tmpfile,'wb')
95
+ tmp.write("""<?xml version="1.0" encoding="UTF-8"?>"""+"\n")
96
+ tmp.write(sigtxt)
97
+ tmp.close()
98
+ if not checkWellFormedness(tmpfile):
99
+ os.unlink(tmpfile)
100
+ sys.stderr.write("getPronomSignature(): signaturefile not well formed")
101
+ return False
102
+ else:
103
+ os.unlink(tmpfile)
104
+ return """<?xml version="1.0" encoding="UTF-8"?>"""+"\n"+sigtxt
105
+ else:
106
+ sys.stderr.write("getPronomSignature(): could not parse XML from SOAP response: "+type)
107
+ return False
108
+ else:
109
+ sys.stderr.write("getPronomSignature(): webservice error: '"+str(statuscode)+" "+statusmessage+"'\n")
110
+ return False
111
+ print sys.stderr.write("getPronomSignature(): unexpected return")
112
+ return False
113
+ except Exception, e:
114
+ print sys.stderr.write("getPronomSignature(): unknown error: "+str(e))
115
+ return False
@@ -0,0 +1,52 @@
1
+ #!python
2
+ # -*- coding: utf-8 -*-
3
+ #
4
+ # FIDO csv output to XML
5
+ # Author: Maurice de Rooij <maurice.de.rooij@nationaalarchief.nl>, september 2011
6
+ #
7
+ # Usage in combination with FIDO:
8
+ # Windows: python fido.py [ARGS] | python toxml.py > output.xml
9
+ # Linux: fido.py [ARGS] | toxml.py > output.xml
10
+ #
11
+ # Usage afterwards:
12
+ # Windows: type output.csv | toxml.py > output.xml
13
+ # Linux: cat output.csv | toxml.py > output.xml
14
+ #
15
+ # for difference in usage, see:
16
+ # http://bugs.python.org/issue9390
17
+ # http://support.microsoft.com/default.aspx?kbid=321788
18
+ #
19
+
20
+ import sys
21
+ import csv
22
+ import string
23
+
24
+ # define FIDO version
25
+ fidoVersion = '1.0'
26
+ # define PRONOM signature version
27
+ signatureVersion = '56'
28
+
29
+ sys.stdout.write("""<?xml version="1.0" encoding="utf-8"?>
30
+ <fido_output>
31
+ <versions>
32
+ <fido_version>{0}</fido_version>
33
+ <signature_version>{1}</signature_version>
34
+ </versions>""".format(fidoVersion,signatureVersion))
35
+
36
+ reader = csv.reader(sys.stdin)
37
+
38
+ for row in reader:
39
+ sys.stdout.write("""
40
+ <file>
41
+ <filename>{0}</filename>
42
+ <status>{1}</status>
43
+ <matchtype>{2}</matchtype>
44
+ <time>{3}</time>
45
+ <puid>{4}</puid>
46
+ <mimetype>{5}</mimetype>
47
+ <formatname>{6}</formatname>
48
+ <signaturename>{7}</signaturename>
49
+ <filesize>{8}</filesize>
50
+ </file>""".format(row[6],row[0],row[8],row[1],row[2],row[7],row[3],row[4],row[5]))
51
+
52
+ sys.stdout.write("\n</fido_output>\n")
@@ -0,0 +1,171 @@
1
+ #!python
2
+ # -*- coding: utf-8 -*-
3
+ #
4
+ # FIDO SIGNATURE UPDATER
5
+ #
6
+ # Open Planets Foundation (http://www.openplanetsfoundation.org)
7
+ # See License.txt for license information.
8
+ # Download from: https://github.com/openplanets/fido/releases
9
+ # Author: Maurice de Rooij (NANETH), 2012
10
+ #
11
+ # FIDO uses the UK National Archives (TNA) PRONOM File Format and Container descriptions .
12
+ # PRONOM is available from http://www.nationalarchives.gov.uk/pronom/
13
+ #
14
+ import sys, os, urllib, time, zipfile, shutil
15
+
16
+ from xml.etree import ElementTree as CET
17
+ from xml.etree import ElementTree as VET
18
+ from pronomutils import getPronomSignature, checkWellFormedness
19
+ import prepare
20
+
21
+ defaults = {
22
+ 'version': '1.2.2',
23
+ 'conf_dir': os.path.join(os.path.dirname(__file__), 'conf'),
24
+ 'tmp_dir': 'tmp',
25
+ 'signatureFileName' : 'DROID_SignatureFile-v{0}.xml',
26
+ 'pronomZipFileName' : 'pronom-xml-v{0}.zip',
27
+ 'fidoSignatureVersion' : 'format_extensions.xml',
28
+ 'versionsFileName' : 'versions.xml',
29
+ 'http_throttle' : 0.5, # in secs, to prevent DoS of PRONOM server
30
+ 'containerVersion' : 'container-signature-20130501.xml', # container version is frozen and needs human attention before updating
31
+ 'versionXML' : """<?xml version="1.0" encoding="UTF-8"?>\n<versions>\n\t<pronomVersion>{0}</pronomVersion>\n\t<pronomSignature>{1}</pronomSignature>\n\t<pronomContainerSignature>{2}</pronomContainerSignature>\n\t<fidoExtensionSignature>{3}</fidoExtensionSignature>\n\t<updateScript>{4}</updateScript>\n</versions>"""
32
+ }
33
+
34
+ def main(defaults):
35
+ """
36
+ Updates PRONOM signatures
37
+ Interactive script, requires keyboard input
38
+ """
39
+ try:
40
+ resume_download = False
41
+ answers = ['y','yes']
42
+ versionXML = defaults['versionXML'].format("{0}","{1}",defaults['containerVersion'],defaults['fidoSignatureVersion'],defaults['version'])
43
+ #print versionXML
44
+ print "FIDO signature updater v"+defaults['version']
45
+ print "Contacting PRONOM..."
46
+ currentVersion = getPronomSignature("version")
47
+ if currentVersion == False:
48
+ print "Failed to obtain PRONOM signature file version number, please try again"
49
+ sys.exit()
50
+ print "Querying latest signaturefile version..."
51
+ signatureFile = os.path.join(os.path.abspath(defaults['conf_dir']), defaults['signatureFileName'].format(currentVersion))
52
+ if os.path.isfile(signatureFile):
53
+ print "You already have the latest PRONOM signature file, version "+str(currentVersion)
54
+ ask = raw_input("Update anyway? (yes/no): ")
55
+ if ask.lower() not in answers:
56
+ sys.exit()
57
+ print "Downloading signature file version "+str(currentVersion)+"..."
58
+ currentFile = getPronomSignature("file")
59
+ if currentFile == False:
60
+ print "Failed to obtain PRONOM signature file, please try again"
61
+ exit()
62
+ sigfile = open(signatureFile,'wb')
63
+ sigfile.write(currentFile)
64
+ sigfile.close()
65
+ print "Writing {0}...".format(defaults['signatureFileName'].format(currentVersion))
66
+ print "Extracting PRONOM PUID's from signature file..."
67
+ tree = CET.parse(signatureFile)
68
+ puids = []
69
+ for node in tree.iter("{http://www.nationalarchives.gov.uk/pronom/SignatureFile}FileFormat"):
70
+ puids.append(node.get("PUID"))
71
+ numberPuids = len(puids)
72
+ print "Found "+str(numberPuids)+" PRONOM PUID's"
73
+ print "Downloading signatures can take a while"
74
+ ask = raw_input("Continue and download signatures? (yes/no): ")
75
+ if ask.lower() not in answers:
76
+ print "Aborting update..."
77
+ sys.exit()
78
+ tmpdir = os.path.join(os.path.abspath(defaults['conf_dir']), defaults['tmp_dir'])
79
+ if os.path.isdir(tmpdir):
80
+ print "Found previously created temporary folder for download:", tmpdir
81
+ ask = raw_input("Resume download (yes) or start over (no)?: ")
82
+ if ask.lower() in answers:
83
+ print "Resuming download..."
84
+ resume_download = True
85
+ else:
86
+ resume_download = False
87
+ else:
88
+ print "Creating temporary folder for download:", tmpdir
89
+ try:
90
+ os.mkdir(tmpdir)
91
+ except:
92
+ pass
93
+ if not os.path.isdir(tmpdir):
94
+ tmpdir = os.path.join(os.path.abspath(defaults['conf_dir']))
95
+ print "Failed to create temporary folder for PUID's, using", tmpdir
96
+ print "Downloading signatures, one moment please..."
97
+ one_percent = (float(numberPuids) / 100)
98
+ numfiles = 0
99
+ for puid in puids:
100
+ puidType, puidNum = puid.split("/")
101
+ puidFileName = "puid."+puidType+"."+puidNum+".xml"
102
+ filename = os.path.join(tmpdir, puidFileName)
103
+ if os.path.isfile(filename) and checkWellFormedness(filename) and resume_download is not False:
104
+ numfiles += 1
105
+ continue
106
+ puidUrl = "http://www.nationalarchives.gov.uk/pronom/"+puid+".xml"
107
+ try:
108
+ filehandle = urllib.urlopen(puidUrl)
109
+ except Exception, e:
110
+ print "Failed to download signaturefile:", puidUrl
111
+ print "Error:", str(e)
112
+ print "Please restart and resume download"
113
+ sys.exit()
114
+ puidfile = open(filename,'wb')
115
+ for lines in filehandle.readlines():
116
+ puidfile.write(lines)
117
+ puidfile.close()
118
+ filehandle.close()
119
+ if not checkWellFormedness(filename):
120
+ os.unlink(filename)
121
+ continue
122
+ numfiles += 1
123
+ percent = int(float(numfiles) / one_percent)
124
+ print "\r",
125
+ print str(percent)+"%",
126
+ time.sleep(defaults['http_throttle'])
127
+ print "100%"
128
+ try:
129
+ import zlib
130
+ compression = zipfile.ZIP_DEFLATED
131
+ except:
132
+ compression = zipfile.ZIP_STORED
133
+ modes = {zipfile.ZIP_DEFLATED: 'deflated', zipfile.ZIP_STORED: 'stored'}
134
+ print "Creating PRONOM zip,",
135
+ zf = zipfile.ZipFile(os.path.join(os.path.abspath(defaults['conf_dir']), defaults['pronomZipFileName'].format(currentVersion)), mode='w')
136
+ print "adding files with compression mode '"+modes[compression]+"'"
137
+ for puid in puids:
138
+ puidType, puidNum = puid.split("/")
139
+ puidFileName = "puid."+puidType+"."+puidNum+".xml"
140
+ filename = os.path.join(os.path.abspath(defaults['conf_dir']), defaults['tmp_dir'], puidFileName)
141
+ if os.path.isfile(filename):
142
+ zf.write(filename, arcname=puidFileName, compress_type=compression)
143
+ os.unlink(filename)
144
+ zf.close()
145
+ print "Deleting temporary folder and files..."
146
+ try:
147
+ for root, dirs, files in os.walk(tmpdir, topdown=False):
148
+ for name in files:
149
+ os.remove(os.path.join(root, name))
150
+ for name in dirs:
151
+ os.rmdir(os.path.join(root, name))
152
+ os.rmdir(tmpdir)
153
+ except:
154
+ pass
155
+ # update versions.xml
156
+ versionsFile = os.path.join(os.path.abspath(defaults['conf_dir']), defaults['versionsFileName'])
157
+ print "Updating {0}...".format(defaults['versionsFileName'])
158
+ xmlversionsfile = open(versionsFile,'wb')
159
+ xmlversionsfile.write(versionXML.format(str(currentVersion),"formats-v"+str(currentVersion)+".xml"))
160
+ xmlversionsfile.close()
161
+ print "Preparing to convert PRONOM formats to FIDO signatures..."
162
+ # there should be a check here to handle prepare.main exit() signal (-1/0/1/...)
163
+ prepare.main()
164
+ print "FIDO signatures successfully updated"
165
+ sys.exit()
166
+ except KeyboardInterrupt:
167
+ print "\nAborting update"
168
+ sys.exit()
169
+
170
+ if __name__ == '__main__':
171
+ main(defaults)
Binary file