libis-format 0.9.5-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +2 -0
  3. data/.gitignore +18 -0
  4. data/.travis.yml +41 -0
  5. data/Gemfile +5 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +39 -0
  8. data/Rakefile +8 -0
  9. data/bin/droid +15 -0
  10. data/bin/fido +12 -0
  11. data/bin/pdf_copy +13 -0
  12. data/data/ISOcoated_v2_eci.icc +0 -0
  13. data/data/PDFA_def.ps +40 -0
  14. data/data/ead.xsd +2728 -0
  15. data/data/eciRGB_v2.icc +0 -0
  16. data/data/lias_formats.xml +106 -0
  17. data/data/types.yml +217 -0
  18. data/lib/libis/format/config.rb +35 -0
  19. data/lib/libis/format/converter/base.rb +101 -0
  20. data/lib/libis/format/converter/chain.rb +167 -0
  21. data/lib/libis/format/converter/image_converter.rb +214 -0
  22. data/lib/libis/format/converter/office_converter.rb +50 -0
  23. data/lib/libis/format/converter/pdf_converter.rb +139 -0
  24. data/lib/libis/format/converter/repository.rb +98 -0
  25. data/lib/libis/format/converter.rb +11 -0
  26. data/lib/libis/format/droid.rb +45 -0
  27. data/lib/libis/format/fido.rb +102 -0
  28. data/lib/libis/format/identifier.rb +189 -0
  29. data/lib/libis/format/office_to_pdf.rb +52 -0
  30. data/lib/libis/format/pdf_copy.rb +40 -0
  31. data/lib/libis/format/pdf_merge.rb +41 -0
  32. data/lib/libis/format/pdf_split.rb +39 -0
  33. data/lib/libis/format/pdf_to_pdfa.rb +76 -0
  34. data/lib/libis/format/pdfa_validator.rb +61 -0
  35. data/lib/libis/format/type_database.rb +170 -0
  36. data/lib/libis/format/version.rb +5 -0
  37. data/lib/libis/format.rb +23 -0
  38. data/lib/libis-format.rb +1 -0
  39. data/libis-format.gemspec +34 -0
  40. data/spec/converter_spec.rb +212 -0
  41. data/spec/data/Cevennes2.bmp +0 -0
  42. data/spec/data/Cevennes2.jp2 +0 -0
  43. data/spec/data/Cevennes2.ppm +22492 -0
  44. data/spec/data/test-ead.xml +392 -0
  45. data/spec/data/test-jpg.tif +0 -0
  46. data/spec/data/test-lzw.tif +0 -0
  47. data/spec/data/test-options.jpg +0 -0
  48. data/spec/data/test.bmp +0 -0
  49. data/spec/data/test.doc +0 -0
  50. data/spec/data/test.docx +0 -0
  51. data/spec/data/test.gif +0 -0
  52. data/spec/data/test.jpg +0 -0
  53. data/spec/data/test.ods +0 -0
  54. data/spec/data/test.odt +0 -0
  55. data/spec/data/test.pdf +0 -0
  56. data/spec/data/test.pdf.tif +0 -0
  57. data/spec/data/test.png +0 -0
  58. data/spec/data/test.ps +8631 -0
  59. data/spec/data/test.psd +0 -0
  60. data/spec/data/test.rtf +1455 -0
  61. data/spec/data/test.tif +0 -0
  62. data/spec/data/test.txt +12 -0
  63. data/spec/data/test.xcf +0 -0
  64. data/spec/data/test.xls +0 -0
  65. data/spec/data/test.xlsx +0 -0
  66. data/spec/data/test.xml +4 -0
  67. data/spec/data/test_pdfa.pdf +0 -0
  68. data/spec/identifier_spec.rb +60 -0
  69. data/spec/spec_helper.rb +9 -0
  70. data/spec/test_types.yml +12 -0
  71. data/spec/type_database_spec.rb +140 -0
  72. data/tools/PdfTool.jar +0 -0
  73. data/tools/bcpkix-jdk15on-1.49.jar +0 -0
  74. data/tools/bcprov-jdk15on-1.49.jar +0 -0
  75. data/tools/droid/DROID_SignatureFile_V82.xml +32681 -0
  76. data/tools/droid/container-signature-20150307.xml +2235 -0
  77. data/tools/droid/droid-command-line-6.1.5.jar +0 -0
  78. data/tools/droid/droid.bat +154 -0
  79. data/tools/droid/droid.sh +138 -0
  80. data/tools/droid/lib/XmlSchema-1.4.7.jar +0 -0
  81. data/tools/droid/lib/activation-1.1.jar +0 -0
  82. data/tools/droid/lib/antlr-2.7.7.jar +0 -0
  83. data/tools/droid/lib/antlr-3.2.jar +0 -0
  84. data/tools/droid/lib/antlr-runtime-3.2.jar +0 -0
  85. data/tools/droid/lib/aopalliance-1.0.jar +0 -0
  86. data/tools/droid/lib/asm-2.2.3.jar +0 -0
  87. data/tools/droid/lib/aspectjrt-1.7.2.jar +0 -0
  88. data/tools/droid/lib/aspectjweaver-1.7.2.jar +0 -0
  89. data/tools/droid/lib/bcmail-jdk14-138.jar +0 -0
  90. data/tools/droid/lib/bcprov-jdk14-138.jar +0 -0
  91. data/tools/droid/lib/beansbinding-1.2.1.jar +0 -0
  92. data/tools/droid/lib/byteseek-1.1.1.jar +0 -0
  93. data/tools/droid/lib/cglib-nodep-2.2.2.jar +0 -0
  94. data/tools/droid/lib/classmate-1.0.0.jar +0 -0
  95. data/tools/droid/lib/commons-cli-1.2.jar +0 -0
  96. data/tools/droid/lib/commons-codec-1.4.jar +0 -0
  97. data/tools/droid/lib/commons-collections-3.2.1.jar +0 -0
  98. data/tools/droid/lib/commons-compress-1.4.1.jar +0 -0
  99. data/tools/droid/lib/commons-configuration-1.8.jar +0 -0
  100. data/tools/droid/lib/commons-dbcp-1.4.jar +0 -0
  101. data/tools/droid/lib/commons-httpclient-3.1.jar +0 -0
  102. data/tools/droid/lib/commons-io-2.4.jar +0 -0
  103. data/tools/droid/lib/commons-lang-2.6.jar +0 -0
  104. data/tools/droid/lib/commons-logging-1.1.1.jar +0 -0
  105. data/tools/droid/lib/commons-pool-1.5.4.jar +0 -0
  106. data/tools/droid/lib/cxf-api-2.2.12.jar +0 -0
  107. data/tools/droid/lib/cxf-common-schemas-2.2.12.jar +0 -0
  108. data/tools/droid/lib/cxf-common-utilities-2.2.12.jar +0 -0
  109. data/tools/droid/lib/cxf-rt-bindings-http-2.2.12.jar +0 -0
  110. data/tools/droid/lib/cxf-rt-bindings-soap-2.2.12.jar +0 -0
  111. data/tools/droid/lib/cxf-rt-bindings-xml-2.2.12.jar +0 -0
  112. data/tools/droid/lib/cxf-rt-core-2.2.12.jar +0 -0
  113. data/tools/droid/lib/cxf-rt-databinding-jaxb-2.2.12.jar +0 -0
  114. data/tools/droid/lib/cxf-rt-frontend-jaxws-2.2.12.jar +0 -0
  115. data/tools/droid/lib/cxf-rt-frontend-simple-2.2.12.jar +0 -0
  116. data/tools/droid/lib/cxf-rt-transports-http-2.2.12.jar +0 -0
  117. data/tools/droid/lib/cxf-rt-ws-addr-2.2.12.jar +0 -0
  118. data/tools/droid/lib/cxf-tools-common-2.2.12.jar +0 -0
  119. data/tools/droid/lib/de.huxhorn.lilith.3rdparty.flyingsaucer.core-renderer-8RC1.jar +0 -0
  120. data/tools/droid/lib/derby-10.10.2.0.jar +0 -0
  121. data/tools/droid/lib/dom4j-1.6.1.jar +0 -0
  122. data/tools/droid/lib/droid-container-6.1.5.jar +0 -0
  123. data/tools/droid/lib/droid-core-6.1.5.jar +0 -0
  124. data/tools/droid/lib/droid-core-interfaces-6.1.5.jar +0 -0
  125. data/tools/droid/lib/droid-export-6.1.5.jar +0 -0
  126. data/tools/droid/lib/droid-export-interfaces-6.1.5.jar +0 -0
  127. data/tools/droid/lib/droid-help-6.1.5.jar +0 -0
  128. data/tools/droid/lib/droid-report-6.1.5.jar +0 -0
  129. data/tools/droid/lib/droid-report-interfaces-6.1.5.jar +0 -0
  130. data/tools/droid/lib/droid-results-6.1.5.jar +0 -0
  131. data/tools/droid/lib/ejb3-persistence-1.0.2.GA.jar +0 -0
  132. data/tools/droid/lib/geronimo-activation_1.1_spec-1.0.2.jar +0 -0
  133. data/tools/droid/lib/geronimo-annotation_1.0_spec-1.1.1.jar +0 -0
  134. data/tools/droid/lib/geronimo-javamail_1.4_spec-1.6.jar +0 -0
  135. data/tools/droid/lib/geronimo-jaxws_2.1_spec-1.0.jar +0 -0
  136. data/tools/droid/lib/geronimo-stax-api_1.0_spec-1.0.1.jar +0 -0
  137. data/tools/droid/lib/geronimo-ws-metadata_2.0_spec-1.1.2.jar +0 -0
  138. data/tools/droid/lib/hibernate-commons-annotations-4.0.4.Final.jar +0 -0
  139. data/tools/droid/lib/hibernate-core-4.3.5.Final.jar +0 -0
  140. data/tools/droid/lib/hibernate-entitymanager-4.3.5.Final.jar +0 -0
  141. data/tools/droid/lib/hibernate-jpa-2.1-api-1.0.0.Final.jar +0 -0
  142. data/tools/droid/lib/hibernate-validator-5.1.0.Final.jar +0 -0
  143. data/tools/droid/lib/itext-2.0.8.jar +0 -0
  144. data/tools/droid/lib/jandex-1.1.0.Final.jar +0 -0
  145. data/tools/droid/lib/javahelp-2.0.05.jar +0 -0
  146. data/tools/droid/lib/javassist-3.18.1-GA.jar +0 -0
  147. data/tools/droid/lib/jaxb-api-2.1.jar +0 -0
  148. data/tools/droid/lib/jaxb-impl-2.1.13.jar +0 -0
  149. data/tools/droid/lib/jboss-logging-3.1.3.GA.jar +0 -0
  150. data/tools/droid/lib/jboss-logging-annotations-1.2.0.Beta1.jar +0 -0
  151. data/tools/droid/lib/jboss-transaction-api_1.2_spec-1.0.0.Final.jar +0 -0
  152. data/tools/droid/lib/joda-time-1.6.2.jar +0 -0
  153. data/tools/droid/lib/jra-1.0-alpha-4.jar +0 -0
  154. data/tools/droid/lib/jta-1.1.jar +0 -0
  155. data/tools/droid/lib/log4j-1.2.13.jar +0 -0
  156. data/tools/droid/lib/neethi-2.0.4.jar +0 -0
  157. data/tools/droid/lib/opencsv-2.3.jar +0 -0
  158. data/tools/droid/lib/org-netbeans-swing-outline-7.2.jar +0 -0
  159. data/tools/droid/lib/org-openide-util-7.2.jar +0 -0
  160. data/tools/droid/lib/org-openide-util-lookup-7.2.jar +0 -0
  161. data/tools/droid/lib/poi-3.7.jar +0 -0
  162. data/tools/droid/lib/saaj-api-1.3.jar +0 -0
  163. data/tools/droid/lib/saaj-impl-1.3.2.jar +0 -0
  164. data/tools/droid/lib/slf4j-api-1.4.2.jar +0 -0
  165. data/tools/droid/lib/slf4j-log4j12-1.4.2.jar +0 -0
  166. data/tools/droid/lib/spring-aop-4.0.3.RELEASE.jar +0 -0
  167. data/tools/droid/lib/spring-beans-4.0.3.RELEASE.jar +0 -0
  168. data/tools/droid/lib/spring-context-4.0.3.RELEASE.jar +0 -0
  169. data/tools/droid/lib/spring-core-4.0.3.RELEASE.jar +0 -0
  170. data/tools/droid/lib/spring-expression-4.0.3.RELEASE.jar +0 -0
  171. data/tools/droid/lib/spring-jdbc-4.0.3.RELEASE.jar +0 -0
  172. data/tools/droid/lib/spring-orm-4.0.3.RELEASE.jar +0 -0
  173. data/tools/droid/lib/spring-tx-4.0.3.RELEASE.jar +0 -0
  174. data/tools/droid/lib/spring-web-2.5.6.jar +0 -0
  175. data/tools/droid/lib/stax-api-1.0-2.jar +0 -0
  176. data/tools/droid/lib/stringtemplate-3.2.jar +0 -0
  177. data/tools/droid/lib/truezip-6.8.4.jar +0 -0
  178. data/tools/droid/lib/validation-api-1.1.0.Final.jar +0 -0
  179. data/tools/droid/lib/wsdl4j-1.6.2.jar +0 -0
  180. data/tools/droid/lib/wstx-asl-3.2.9.jar +0 -0
  181. data/tools/droid/lib/xercesImpl-2.9.1.jar +0 -0
  182. data/tools/droid/lib/xml-apis-1.3.04.jar +0 -0
  183. data/tools/droid/lib/xml-resolver-1.2.jar +0 -0
  184. data/tools/droid/lib/xz-1.0.jar +0 -0
  185. data/tools/fido/__init__.py +0 -0
  186. data/tools/fido/argparselocal.py +2355 -0
  187. data/tools/fido/conf/DROID_SignatureFile-v81.xml +2 -0
  188. data/tools/fido/conf/container-signature-20150307.xml +2238 -0
  189. data/tools/fido/conf/dc.xsd +119 -0
  190. data/tools/fido/conf/dcmitype.xsd +53 -0
  191. data/tools/fido/conf/dcterms.xsd +383 -0
  192. data/tools/fido/conf/fido-formats.xsd +173 -0
  193. data/tools/fido/conf/format_extension_template.xml +105 -0
  194. data/tools/fido/conf/format_extensions.xml +498 -0
  195. data/tools/fido/conf/formats-v81.xml +38355 -0
  196. data/tools/fido/conf/pronom-xml-v81.zip +0 -0
  197. data/tools/fido/conf/versions.xml +8 -0
  198. data/tools/fido/fido.bat +4 -0
  199. data/tools/fido/fido.py +854 -0
  200. data/tools/fido/fido.sh +5 -0
  201. data/tools/fido/prepare.py +616 -0
  202. data/tools/fido/pronomutils.py +115 -0
  203. data/tools/fido/toxml.py +52 -0
  204. data/tools/fido/update_signatures.py +171 -0
  205. data/tools/pdfbox/pdfbox-app-1.8.10.jar +0 -0
  206. data/tools/pdfbox/preflight-app-1.8.10.jar +0 -0
  207. metadata +396 -0
@@ -0,0 +1,115 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # PRONOM UTILS
4
+ #
5
+ # PYTHON FUNCTION TO QUERY PRONOM VERSION
6
+ # AND DOWNLOAD SIGNATUREFILE
7
+ # USES PRONOM SOAP SERVICE
8
+ #
9
+ # Open Planets Foundation (http://www.openplanetsfoundation.org)
10
+ # See License.txt for license information.
11
+ # Download from: http://github.com/openplanets/fido/downloads
12
+ # Author: Maurice de Rooij (OPF/NANETH), 2012
13
+ #
14
+ # PRONOM UTILS is a library used by FIDO
15
+ # FIDO uses the UK National Archives (TNA) PRONOM File Format and Container descriptions.
16
+ # PRONOM is available from http://www.nationalarchives.gov.uk/pronom/
17
+ #
18
+ import sys
19
+ from xml.dom import minidom
20
+ __pronomutils__ = {'version' : '1.0.1'}
21
+
22
+ def checkWellFormedness(filename,error=False):
23
+ """
24
+ usage: checkWellFormedness(filename)
25
+ arguments:
26
+ "filename": returns true if filename is a valid XML file
27
+ "error": whether or not print to stderr upon error
28
+ """
29
+ import xml.parsers.expat
30
+ parser = xml.parsers.expat.ParserCreate()
31
+ try:
32
+ parser.ParseFile(open(filename, "r"))
33
+ except Exception, e:
34
+ if error is not False:
35
+ sys.stderr.write("checkWellFormedness: %s: %s;\n" % (filename, e))
36
+ return False
37
+ return True
38
+
39
+ def getPronomSignature(type):
40
+ """
41
+ usage: getPronomSignature(version|file)
42
+ arguments:
43
+ "version": returns latest signature file version number as int
44
+ "file": returns latest signature XML file as string
45
+ upon error: writes to stderr and returns false
46
+ """
47
+ try:
48
+ import httplib
49
+ import re
50
+ import os
51
+ soapVersionContainer = """<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><getSignatureFileVersionV1 xmlns="http://pronom.nationalarchives.gov.uk" /></soap:Body></soap:Envelope>"""
52
+ soapFileContainer = """<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><getSignatureFileV1 xmlns="http://pronom.nationalarchives.gov.uk" /></soap:Body></soap:Envelope>"""
53
+ soapVersionHeader = """\"http://pronom.nationalarchives.gov.uk:getSignatureFileVersionV1In\""""
54
+ soapFileHeader = """\"http://pronom.nationalarchives.gov.uk:getSignatureFileV1In\""""
55
+ if type == "version":
56
+ soapAction = soapVersionHeader
57
+ soapStr = soapVersionContainer
58
+ elif type == "file":
59
+ soapAction = soapFileHeader
60
+ soapStr = soapFileContainer
61
+ else:
62
+ sys.stderr.write("getPronomSignature(): unknown type: "+type)
63
+ return False
64
+ webservice = httplib.HTTP("apps.nationalarchives.gov.uk")
65
+ webservice.putrequest("POST", "/pronom/service.asmx")
66
+ webservice.putheader("Host", "www.nationalarchives.gov.uk")
67
+ webservice.putheader("User-Agent", "PRONOM UTILS v{0} (OPF)".format(__pronomutils__['version']))
68
+ webservice.putheader("Content-type", "text/xml; charset=\"UTF-8\"")
69
+ webservice.putheader("Content-length", "%d" % len(soapStr))
70
+ webservice.putheader("SOAPAction", soapAction)
71
+ try:
72
+ webservice.endheaders()
73
+ except Exception, e:
74
+ sys.stderr.write("getPronomSignature(): failed to contact PRONOM;\n%s\n" % (e))
75
+ sys.exit()
76
+ webservice.send(soapStr)
77
+ statuscode, statusmessage, header = webservice.getreply()
78
+ if statuscode == 200:
79
+ xml = webservice.getfile()
80
+ if type == "version":
81
+ exp = re.compile("\<Version\>([0-9]{1,4})\<\/Version\>")
82
+ sigxml = exp.search(xml.read())
83
+ if len(sigxml.group(1)) > 0:
84
+ return int(sigxml.group(1))
85
+ else:
86
+ sys.stderr.write("getPronomSignature(): could not parse VERSION from SOAP response: "+type)
87
+ return False
88
+ if type == "file":
89
+ exp = re.compile("\<SignatureFile\>.*\<\/SignatureFile\>")
90
+ sigxml = exp.search(xml.read())
91
+ sigtxt = sigxml.group(0) if sigxml else ''
92
+ if len(sigtxt) > 0:
93
+ tmpfile = "./tmp_getPronomSignature.xml"
94
+ tmp = open(tmpfile,'wb')
95
+ tmp.write("""<?xml version="1.0" encoding="UTF-8"?>"""+"\n")
96
+ tmp.write(sigtxt)
97
+ tmp.close()
98
+ if not checkWellFormedness(tmpfile):
99
+ os.unlink(tmpfile)
100
+ sys.stderr.write("getPronomSignature(): signaturefile not well formed")
101
+ return False
102
+ else:
103
+ os.unlink(tmpfile)
104
+ return """<?xml version="1.0" encoding="UTF-8"?>"""+"\n"+sigtxt
105
+ else:
106
+ sys.stderr.write("getPronomSignature(): could not parse XML from SOAP response: "+type)
107
+ return False
108
+ else:
109
+ sys.stderr.write("getPronomSignature(): webservice error: '"+str(statuscode)+" "+statusmessage+"'\n")
110
+ return False
111
+ print sys.stderr.write("getPronomSignature(): unexpected return")
112
+ return False
113
+ except Exception, e:
114
+ print sys.stderr.write("getPronomSignature(): unknown error: "+str(e))
115
+ return False
@@ -0,0 +1,52 @@
1
+ #!python
2
+ # -*- coding: utf-8 -*-
3
+ #
4
+ # FIDO csv output to XML
5
+ # Author: Maurice de Rooij <maurice.de.rooij@nationaalarchief.nl>, september 2011
6
+ #
7
+ # Usage in combination with FIDO:
8
+ # Windows: python fido.py [ARGS] | python toxml.py > output.xml
9
+ # Linux: fido.py [ARGS] | toxml.py > output.xml
10
+ #
11
+ # Usage afterwards:
12
+ # Windows: type output.csv | toxml.py > output.xml
13
+ # Linux: cat output.csv | toxml.py > output.xml
14
+ #
15
+ # for difference in usage, see:
16
+ # http://bugs.python.org/issue9390
17
+ # http://support.microsoft.com/default.aspx?kbid=321788
18
+ #
19
+
20
+ import sys
21
+ import csv
22
+ import string
23
+
24
+ # define FIDO version
25
+ fidoVersion = '1.0'
26
+ # define PRONOM signature version
27
+ signatureVersion = '56'
28
+
29
+ sys.stdout.write("""<?xml version="1.0" encoding="utf-8"?>
30
+ <fido_output>
31
+ <versions>
32
+ <fido_version>{0}</fido_version>
33
+ <signature_version>{1}</signature_version>
34
+ </versions>""".format(fidoVersion,signatureVersion))
35
+
36
+ reader = csv.reader(sys.stdin)
37
+
38
+ for row in reader:
39
+ sys.stdout.write("""
40
+ <file>
41
+ <filename>{0}</filename>
42
+ <status>{1}</status>
43
+ <matchtype>{2}</matchtype>
44
+ <time>{3}</time>
45
+ <puid>{4}</puid>
46
+ <mimetype>{5}</mimetype>
47
+ <formatname>{6}</formatname>
48
+ <signaturename>{7}</signaturename>
49
+ <filesize>{8}</filesize>
50
+ </file>""".format(row[6],row[0],row[8],row[1],row[2],row[7],row[3],row[4],row[5]))
51
+
52
+ sys.stdout.write("\n</fido_output>\n")
@@ -0,0 +1,171 @@
1
+ #!python
2
+ # -*- coding: utf-8 -*-
3
+ #
4
+ # FIDO SIGNATURE UPDATER
5
+ #
6
+ # Open Planets Foundation (http://www.openplanetsfoundation.org)
7
+ # See License.txt for license information.
8
+ # Download from: https://github.com/openplanets/fido/releases
9
+ # Author: Maurice de Rooij (NANETH), 2012
10
+ #
11
+ # FIDO uses the UK National Archives (TNA) PRONOM File Format and Container descriptions .
12
+ # PRONOM is available from http://www.nationalarchives.gov.uk/pronom/
13
+ #
14
+ import sys, os, urllib, time, zipfile, shutil
15
+
16
+ from xml.etree import ElementTree as CET
17
+ from xml.etree import ElementTree as VET
18
+ from pronomutils import getPronomSignature, checkWellFormedness
19
+ import prepare
20
+
21
+ defaults = {
22
+ 'version': '1.2.2',
23
+ 'conf_dir': os.path.join(os.path.dirname(__file__), 'conf'),
24
+ 'tmp_dir': 'tmp',
25
+ 'signatureFileName' : 'DROID_SignatureFile-v{0}.xml',
26
+ 'pronomZipFileName' : 'pronom-xml-v{0}.zip',
27
+ 'fidoSignatureVersion' : 'format_extensions.xml',
28
+ 'versionsFileName' : 'versions.xml',
29
+ 'http_throttle' : 0.5, # in secs, to prevent DoS of PRONOM server
30
+ 'containerVersion' : 'container-signature-20130501.xml', # container version is frozen and needs human attention before updating
31
+ 'versionXML' : """<?xml version="1.0" encoding="UTF-8"?>\n<versions>\n\t<pronomVersion>{0}</pronomVersion>\n\t<pronomSignature>{1}</pronomSignature>\n\t<pronomContainerSignature>{2}</pronomContainerSignature>\n\t<fidoExtensionSignature>{3}</fidoExtensionSignature>\n\t<updateScript>{4}</updateScript>\n</versions>"""
32
+ }
33
+
34
+ def main(defaults):
35
+ """
36
+ Updates PRONOM signatures
37
+ Interactive script, requires keyboard input
38
+ """
39
+ try:
40
+ resume_download = False
41
+ answers = ['y','yes']
42
+ versionXML = defaults['versionXML'].format("{0}","{1}",defaults['containerVersion'],defaults['fidoSignatureVersion'],defaults['version'])
43
+ #print versionXML
44
+ print "FIDO signature updater v"+defaults['version']
45
+ print "Contacting PRONOM..."
46
+ currentVersion = getPronomSignature("version")
47
+ if currentVersion == False:
48
+ print "Failed to obtain PRONOM signature file version number, please try again"
49
+ sys.exit()
50
+ print "Querying latest signaturefile version..."
51
+ signatureFile = os.path.join(os.path.abspath(defaults['conf_dir']), defaults['signatureFileName'].format(currentVersion))
52
+ if os.path.isfile(signatureFile):
53
+ print "You already have the latest PRONOM signature file, version "+str(currentVersion)
54
+ ask = raw_input("Update anyway? (yes/no): ")
55
+ if ask.lower() not in answers:
56
+ sys.exit()
57
+ print "Downloading signature file version "+str(currentVersion)+"..."
58
+ currentFile = getPronomSignature("file")
59
+ if currentFile == False:
60
+ print "Failed to obtain PRONOM signature file, please try again"
61
+ exit()
62
+ sigfile = open(signatureFile,'wb')
63
+ sigfile.write(currentFile)
64
+ sigfile.close()
65
+ print "Writing {0}...".format(defaults['signatureFileName'].format(currentVersion))
66
+ print "Extracting PRONOM PUID's from signature file..."
67
+ tree = CET.parse(signatureFile)
68
+ puids = []
69
+ for node in tree.iter("{http://www.nationalarchives.gov.uk/pronom/SignatureFile}FileFormat"):
70
+ puids.append(node.get("PUID"))
71
+ numberPuids = len(puids)
72
+ print "Found "+str(numberPuids)+" PRONOM PUID's"
73
+ print "Downloading signatures can take a while"
74
+ ask = raw_input("Continue and download signatures? (yes/no): ")
75
+ if ask.lower() not in answers:
76
+ print "Aborting update..."
77
+ sys.exit()
78
+ tmpdir = os.path.join(os.path.abspath(defaults['conf_dir']), defaults['tmp_dir'])
79
+ if os.path.isdir(tmpdir):
80
+ print "Found previously created temporary folder for download:", tmpdir
81
+ ask = raw_input("Resume download (yes) or start over (no)?: ")
82
+ if ask.lower() in answers:
83
+ print "Resuming download..."
84
+ resume_download = True
85
+ else:
86
+ resume_download = False
87
+ else:
88
+ print "Creating temporary folder for download:", tmpdir
89
+ try:
90
+ os.mkdir(tmpdir)
91
+ except:
92
+ pass
93
+ if not os.path.isdir(tmpdir):
94
+ tmpdir = os.path.join(os.path.abspath(defaults['conf_dir']))
95
+ print "Failed to create temporary folder for PUID's, using", tmpdir
96
+ print "Downloading signatures, one moment please..."
97
+ one_percent = (float(numberPuids) / 100)
98
+ numfiles = 0
99
+ for puid in puids:
100
+ puidType, puidNum = puid.split("/")
101
+ puidFileName = "puid."+puidType+"."+puidNum+".xml"
102
+ filename = os.path.join(tmpdir, puidFileName)
103
+ if os.path.isfile(filename) and checkWellFormedness(filename) and resume_download is not False:
104
+ numfiles += 1
105
+ continue
106
+ puidUrl = "http://www.nationalarchives.gov.uk/pronom/"+puid+".xml"
107
+ try:
108
+ filehandle = urllib.urlopen(puidUrl)
109
+ except Exception, e:
110
+ print "Failed to download signaturefile:", puidUrl
111
+ print "Error:", str(e)
112
+ print "Please restart and resume download"
113
+ sys.exit()
114
+ puidfile = open(filename,'wb')
115
+ for lines in filehandle.readlines():
116
+ puidfile.write(lines)
117
+ puidfile.close()
118
+ filehandle.close()
119
+ if not checkWellFormedness(filename):
120
+ os.unlink(filename)
121
+ continue
122
+ numfiles += 1
123
+ percent = int(float(numfiles) / one_percent)
124
+ print "\r",
125
+ print str(percent)+"%",
126
+ time.sleep(defaults['http_throttle'])
127
+ print "100%"
128
+ try:
129
+ import zlib
130
+ compression = zipfile.ZIP_DEFLATED
131
+ except:
132
+ compression = zipfile.ZIP_STORED
133
+ modes = {zipfile.ZIP_DEFLATED: 'deflated', zipfile.ZIP_STORED: 'stored'}
134
+ print "Creating PRONOM zip,",
135
+ zf = zipfile.ZipFile(os.path.join(os.path.abspath(defaults['conf_dir']), defaults['pronomZipFileName'].format(currentVersion)), mode='w')
136
+ print "adding files with compression mode '"+modes[compression]+"'"
137
+ for puid in puids:
138
+ puidType, puidNum = puid.split("/")
139
+ puidFileName = "puid."+puidType+"."+puidNum+".xml"
140
+ filename = os.path.join(os.path.abspath(defaults['conf_dir']), defaults['tmp_dir'], puidFileName)
141
+ if os.path.isfile(filename):
142
+ zf.write(filename, arcname=puidFileName, compress_type=compression)
143
+ os.unlink(filename)
144
+ zf.close()
145
+ print "Deleting temporary folder and files..."
146
+ try:
147
+ for root, dirs, files in os.walk(tmpdir, topdown=False):
148
+ for name in files:
149
+ os.remove(os.path.join(root, name))
150
+ for name in dirs:
151
+ os.rmdir(os.path.join(root, name))
152
+ os.rmdir(tmpdir)
153
+ except:
154
+ pass
155
+ # update versions.xml
156
+ versionsFile = os.path.join(os.path.abspath(defaults['conf_dir']), defaults['versionsFileName'])
157
+ print "Updating {0}...".format(defaults['versionsFileName'])
158
+ xmlversionsfile = open(versionsFile,'wb')
159
+ xmlversionsfile.write(versionXML.format(str(currentVersion),"formats-v"+str(currentVersion)+".xml"))
160
+ xmlversionsfile.close()
161
+ print "Preparing to convert PRONOM formats to FIDO signatures..."
162
+ # there should be a check here to handle prepare.main exit() signal (-1/0/1/...)
163
+ prepare.main()
164
+ print "FIDO signatures successfully updated"
165
+ sys.exit()
166
+ except KeyboardInterrupt:
167
+ print "\nAborting update"
168
+ sys.exit()
169
+
170
+ if __name__ == '__main__':
171
+ main(defaults)
Binary file