libis-format 0.9.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (186) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +2 -0
  3. data/.gitignore +15 -0
  4. data/.travis.yml +36 -0
  5. data/Gemfile +5 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +38 -0
  8. data/Rakefile +8 -0
  9. data/bin/droid +15 -0
  10. data/bin/fido +12 -0
  11. data/data/ISOcoated.icc +0 -0
  12. data/data/PDFA_def.ps +32 -0
  13. data/data/ead.xsd +2728 -0
  14. data/data/lias_formats.xml +106 -0
  15. data/data/types.yml +213 -0
  16. data/lib/libis/format/converter/base.rb +103 -0
  17. data/lib/libis/format/converter/chain.rb +80 -0
  18. data/lib/libis/format/converter/repository.rb +110 -0
  19. data/lib/libis/format/converter.rb +11 -0
  20. data/lib/libis/format/droid.rb +38 -0
  21. data/lib/libis/format/fido.rb +109 -0
  22. data/lib/libis/format/identifier.rb +185 -0
  23. data/lib/libis/format/type_database.rb +170 -0
  24. data/lib/libis/format/version.rb +5 -0
  25. data/lib/libis/format.rb +12 -0
  26. data/lib/libis-format.rb +1 -0
  27. data/libis-format.gemspec +30 -0
  28. data/spec/data/Cevennes2.bmp +0 -0
  29. data/spec/data/Cevennes2.jp2 +0 -0
  30. data/spec/data/Cevennes2.ppm +22492 -0
  31. data/spec/data/test-ead.xml +392 -0
  32. data/spec/data/test-jpg.tif +0 -0
  33. data/spec/data/test-lzw.tif +0 -0
  34. data/spec/data/test.bmp +0 -0
  35. data/spec/data/test.doc +0 -0
  36. data/spec/data/test.docx +0 -0
  37. data/spec/data/test.gif +0 -0
  38. data/spec/data/test.ods +0 -0
  39. data/spec/data/test.odt +0 -0
  40. data/spec/data/test.pdf +0 -0
  41. data/spec/data/test.png +0 -0
  42. data/spec/data/test.ps +8631 -0
  43. data/spec/data/test.psd +0 -0
  44. data/spec/data/test.rtf +1455 -0
  45. data/spec/data/test.tif +0 -0
  46. data/spec/data/test.txt +12 -0
  47. data/spec/data/test.xcf +0 -0
  48. data/spec/data/test.xls +0 -0
  49. data/spec/data/test.xlsx +0 -0
  50. data/spec/data/test.xml +4 -0
  51. data/spec/identifier_spec.rb +59 -0
  52. data/spec/spec_helper.rb +9 -0
  53. data/spec/test_types.yml +12 -0
  54. data/spec/type_database_spec.rb +140 -0
  55. data/tools/droid/DROID_SignatureFile_V82.xml +32681 -0
  56. data/tools/droid/container-signature-20150307.xml +2235 -0
  57. data/tools/droid/droid-command-line-6.1.5.jar +0 -0
  58. data/tools/droid/droid.bat +154 -0
  59. data/tools/droid/droid.sh +138 -0
  60. data/tools/droid/lib/XmlSchema-1.4.7.jar +0 -0
  61. data/tools/droid/lib/activation-1.1.jar +0 -0
  62. data/tools/droid/lib/antlr-2.7.7.jar +0 -0
  63. data/tools/droid/lib/antlr-3.2.jar +0 -0
  64. data/tools/droid/lib/antlr-runtime-3.2.jar +0 -0
  65. data/tools/droid/lib/aopalliance-1.0.jar +0 -0
  66. data/tools/droid/lib/asm-2.2.3.jar +0 -0
  67. data/tools/droid/lib/aspectjrt-1.7.2.jar +0 -0
  68. data/tools/droid/lib/aspectjweaver-1.7.2.jar +0 -0
  69. data/tools/droid/lib/bcmail-jdk14-138.jar +0 -0
  70. data/tools/droid/lib/bcprov-jdk14-138.jar +0 -0
  71. data/tools/droid/lib/beansbinding-1.2.1.jar +0 -0
  72. data/tools/droid/lib/byteseek-1.1.1.jar +0 -0
  73. data/tools/droid/lib/cglib-nodep-2.2.2.jar +0 -0
  74. data/tools/droid/lib/classmate-1.0.0.jar +0 -0
  75. data/tools/droid/lib/commons-cli-1.2.jar +0 -0
  76. data/tools/droid/lib/commons-codec-1.4.jar +0 -0
  77. data/tools/droid/lib/commons-collections-3.2.1.jar +0 -0
  78. data/tools/droid/lib/commons-compress-1.4.1.jar +0 -0
  79. data/tools/droid/lib/commons-configuration-1.8.jar +0 -0
  80. data/tools/droid/lib/commons-dbcp-1.4.jar +0 -0
  81. data/tools/droid/lib/commons-httpclient-3.1.jar +0 -0
  82. data/tools/droid/lib/commons-io-2.4.jar +0 -0
  83. data/tools/droid/lib/commons-lang-2.6.jar +0 -0
  84. data/tools/droid/lib/commons-logging-1.1.1.jar +0 -0
  85. data/tools/droid/lib/commons-pool-1.5.4.jar +0 -0
  86. data/tools/droid/lib/cxf-api-2.2.12.jar +0 -0
  87. data/tools/droid/lib/cxf-common-schemas-2.2.12.jar +0 -0
  88. data/tools/droid/lib/cxf-common-utilities-2.2.12.jar +0 -0
  89. data/tools/droid/lib/cxf-rt-bindings-http-2.2.12.jar +0 -0
  90. data/tools/droid/lib/cxf-rt-bindings-soap-2.2.12.jar +0 -0
  91. data/tools/droid/lib/cxf-rt-bindings-xml-2.2.12.jar +0 -0
  92. data/tools/droid/lib/cxf-rt-core-2.2.12.jar +0 -0
  93. data/tools/droid/lib/cxf-rt-databinding-jaxb-2.2.12.jar +0 -0
  94. data/tools/droid/lib/cxf-rt-frontend-jaxws-2.2.12.jar +0 -0
  95. data/tools/droid/lib/cxf-rt-frontend-simple-2.2.12.jar +0 -0
  96. data/tools/droid/lib/cxf-rt-transports-http-2.2.12.jar +0 -0
  97. data/tools/droid/lib/cxf-rt-ws-addr-2.2.12.jar +0 -0
  98. data/tools/droid/lib/cxf-tools-common-2.2.12.jar +0 -0
  99. data/tools/droid/lib/de.huxhorn.lilith.3rdparty.flyingsaucer.core-renderer-8RC1.jar +0 -0
  100. data/tools/droid/lib/derby-10.10.2.0.jar +0 -0
  101. data/tools/droid/lib/dom4j-1.6.1.jar +0 -0
  102. data/tools/droid/lib/droid-container-6.1.5.jar +0 -0
  103. data/tools/droid/lib/droid-core-6.1.5.jar +0 -0
  104. data/tools/droid/lib/droid-core-interfaces-6.1.5.jar +0 -0
  105. data/tools/droid/lib/droid-export-6.1.5.jar +0 -0
  106. data/tools/droid/lib/droid-export-interfaces-6.1.5.jar +0 -0
  107. data/tools/droid/lib/droid-help-6.1.5.jar +0 -0
  108. data/tools/droid/lib/droid-report-6.1.5.jar +0 -0
  109. data/tools/droid/lib/droid-report-interfaces-6.1.5.jar +0 -0
  110. data/tools/droid/lib/droid-results-6.1.5.jar +0 -0
  111. data/tools/droid/lib/ejb3-persistence-1.0.2.GA.jar +0 -0
  112. data/tools/droid/lib/geronimo-activation_1.1_spec-1.0.2.jar +0 -0
  113. data/tools/droid/lib/geronimo-annotation_1.0_spec-1.1.1.jar +0 -0
  114. data/tools/droid/lib/geronimo-javamail_1.4_spec-1.6.jar +0 -0
  115. data/tools/droid/lib/geronimo-jaxws_2.1_spec-1.0.jar +0 -0
  116. data/tools/droid/lib/geronimo-stax-api_1.0_spec-1.0.1.jar +0 -0
  117. data/tools/droid/lib/geronimo-ws-metadata_2.0_spec-1.1.2.jar +0 -0
  118. data/tools/droid/lib/hibernate-commons-annotations-4.0.4.Final.jar +0 -0
  119. data/tools/droid/lib/hibernate-core-4.3.5.Final.jar +0 -0
  120. data/tools/droid/lib/hibernate-entitymanager-4.3.5.Final.jar +0 -0
  121. data/tools/droid/lib/hibernate-jpa-2.1-api-1.0.0.Final.jar +0 -0
  122. data/tools/droid/lib/hibernate-validator-5.1.0.Final.jar +0 -0
  123. data/tools/droid/lib/itext-2.0.8.jar +0 -0
  124. data/tools/droid/lib/jandex-1.1.0.Final.jar +0 -0
  125. data/tools/droid/lib/javahelp-2.0.05.jar +0 -0
  126. data/tools/droid/lib/javassist-3.18.1-GA.jar +0 -0
  127. data/tools/droid/lib/jaxb-api-2.1.jar +0 -0
  128. data/tools/droid/lib/jaxb-impl-2.1.13.jar +0 -0
  129. data/tools/droid/lib/jboss-logging-3.1.3.GA.jar +0 -0
  130. data/tools/droid/lib/jboss-logging-annotations-1.2.0.Beta1.jar +0 -0
  131. data/tools/droid/lib/jboss-transaction-api_1.2_spec-1.0.0.Final.jar +0 -0
  132. data/tools/droid/lib/joda-time-1.6.2.jar +0 -0
  133. data/tools/droid/lib/jra-1.0-alpha-4.jar +0 -0
  134. data/tools/droid/lib/jta-1.1.jar +0 -0
  135. data/tools/droid/lib/log4j-1.2.13.jar +0 -0
  136. data/tools/droid/lib/neethi-2.0.4.jar +0 -0
  137. data/tools/droid/lib/opencsv-2.3.jar +0 -0
  138. data/tools/droid/lib/org-netbeans-swing-outline-7.2.jar +0 -0
  139. data/tools/droid/lib/org-openide-util-7.2.jar +0 -0
  140. data/tools/droid/lib/org-openide-util-lookup-7.2.jar +0 -0
  141. data/tools/droid/lib/poi-3.7.jar +0 -0
  142. data/tools/droid/lib/saaj-api-1.3.jar +0 -0
  143. data/tools/droid/lib/saaj-impl-1.3.2.jar +0 -0
  144. data/tools/droid/lib/slf4j-api-1.4.2.jar +0 -0
  145. data/tools/droid/lib/slf4j-log4j12-1.4.2.jar +0 -0
  146. data/tools/droid/lib/spring-aop-4.0.3.RELEASE.jar +0 -0
  147. data/tools/droid/lib/spring-beans-4.0.3.RELEASE.jar +0 -0
  148. data/tools/droid/lib/spring-context-4.0.3.RELEASE.jar +0 -0
  149. data/tools/droid/lib/spring-core-4.0.3.RELEASE.jar +0 -0
  150. data/tools/droid/lib/spring-expression-4.0.3.RELEASE.jar +0 -0
  151. data/tools/droid/lib/spring-jdbc-4.0.3.RELEASE.jar +0 -0
  152. data/tools/droid/lib/spring-orm-4.0.3.RELEASE.jar +0 -0
  153. data/tools/droid/lib/spring-tx-4.0.3.RELEASE.jar +0 -0
  154. data/tools/droid/lib/spring-web-2.5.6.jar +0 -0
  155. data/tools/droid/lib/stax-api-1.0-2.jar +0 -0
  156. data/tools/droid/lib/stringtemplate-3.2.jar +0 -0
  157. data/tools/droid/lib/truezip-6.8.4.jar +0 -0
  158. data/tools/droid/lib/validation-api-1.1.0.Final.jar +0 -0
  159. data/tools/droid/lib/wsdl4j-1.6.2.jar +0 -0
  160. data/tools/droid/lib/wstx-asl-3.2.9.jar +0 -0
  161. data/tools/droid/lib/xercesImpl-2.9.1.jar +0 -0
  162. data/tools/droid/lib/xml-apis-1.3.04.jar +0 -0
  163. data/tools/droid/lib/xml-resolver-1.2.jar +0 -0
  164. data/tools/droid/lib/xz-1.0.jar +0 -0
  165. data/tools/fido/__init__.py +0 -0
  166. data/tools/fido/argparselocal.py +2355 -0
  167. data/tools/fido/argparselocal.pyc +0 -0
  168. data/tools/fido/conf/DROID_SignatureFile-v81.xml +2 -0
  169. data/tools/fido/conf/container-signature-20150307.xml +2238 -0
  170. data/tools/fido/conf/dc.xsd +119 -0
  171. data/tools/fido/conf/dcmitype.xsd +53 -0
  172. data/tools/fido/conf/dcterms.xsd +383 -0
  173. data/tools/fido/conf/fido-formats.xsd +173 -0
  174. data/tools/fido/conf/format_extension_template.xml +105 -0
  175. data/tools/fido/conf/format_extensions.xml +498 -0
  176. data/tools/fido/conf/formats-v81.xml +38355 -0
  177. data/tools/fido/conf/pronom-xml-v81.zip +0 -0
  178. data/tools/fido/conf/versions.xml +8 -0
  179. data/tools/fido/fido.bat +4 -0
  180. data/tools/fido/fido.py +854 -0
  181. data/tools/fido/fido.sh +5 -0
  182. data/tools/fido/prepare.py +616 -0
  183. data/tools/fido/pronomutils.py +115 -0
  184. data/tools/fido/toxml.py +52 -0
  185. data/tools/fido/update_signatures.py +171 -0
  186. metadata +342 -0
@@ -0,0 +1,115 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # PRONOM UTILS
4
+ #
5
+ # PYTHON FUNCTION TO QUERY PRONOM VERSION
6
+ # AND DOWNLOAD SIGNATUREFILE
7
+ # USES PRONOM SOAP SERVICE
8
+ #
9
+ # Open Planets Foundation (http://www.openplanetsfoundation.org)
10
+ # See License.txt for license information.
11
+ # Download from: http://github.com/openplanets/fido/downloads
12
+ # Author: Maurice de Rooij (OPF/NANETH), 2012
13
+ #
14
+ # PRONOM UTILS is a library used by FIDO
15
+ # FIDO uses the UK National Archives (TNA) PRONOM File Format and Container descriptions.
16
+ # PRONOM is available from http://www.nationalarchives.gov.uk/pronom/
17
+ #
18
+ import sys
19
+ from xml.dom import minidom
20
+ __pronomutils__ = {'version' : '1.0.1'}
21
+
22
+ def checkWellFormedness(filename,error=False):
23
+ """
24
+ usage: checkWellFormedness(filename)
25
+ arguments:
26
+ "filename": returns true if filename is a valid XML file
27
+ "error": whether or not print to stderr upon error
28
+ """
29
+ import xml.parsers.expat
30
+ parser = xml.parsers.expat.ParserCreate()
31
+ try:
32
+ parser.ParseFile(open(filename, "r"))
33
+ except Exception, e:
34
+ if error is not False:
35
+ sys.stderr.write("checkWellFormedness: %s: %s;\n" % (filename, e))
36
+ return False
37
+ return True
38
+
39
+ def getPronomSignature(type):
40
+ """
41
+ usage: getPronomSignature(version|file)
42
+ arguments:
43
+ "version": returns latest signature file version number as int
44
+ "file": returns latest signature XML file as string
45
+ upon error: writes to stderr and returns false
46
+ """
47
+ try:
48
+ import httplib
49
+ import re
50
+ import os
51
+ soapVersionContainer = """<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><getSignatureFileVersionV1 xmlns="http://pronom.nationalarchives.gov.uk" /></soap:Body></soap:Envelope>"""
52
+ soapFileContainer = """<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><getSignatureFileV1 xmlns="http://pronom.nationalarchives.gov.uk" /></soap:Body></soap:Envelope>"""
53
+ soapVersionHeader = """\"http://pronom.nationalarchives.gov.uk:getSignatureFileVersionV1In\""""
54
+ soapFileHeader = """\"http://pronom.nationalarchives.gov.uk:getSignatureFileV1In\""""
55
+ if type == "version":
56
+ soapAction = soapVersionHeader
57
+ soapStr = soapVersionContainer
58
+ elif type == "file":
59
+ soapAction = soapFileHeader
60
+ soapStr = soapFileContainer
61
+ else:
62
+ sys.stderr.write("getPronomSignature(): unknown type: "+type)
63
+ return False
64
+ webservice = httplib.HTTP("apps.nationalarchives.gov.uk")
65
+ webservice.putrequest("POST", "/pronom/service.asmx")
66
+ webservice.putheader("Host", "www.nationalarchives.gov.uk")
67
+ webservice.putheader("User-Agent", "PRONOM UTILS v{0} (OPF)".format(__pronomutils__['version']))
68
+ webservice.putheader("Content-type", "text/xml; charset=\"UTF-8\"")
69
+ webservice.putheader("Content-length", "%d" % len(soapStr))
70
+ webservice.putheader("SOAPAction", soapAction)
71
+ try:
72
+ webservice.endheaders()
73
+ except Exception, e:
74
+ sys.stderr.write("getPronomSignature(): failed to contact PRONOM;\n%s\n" % (e))
75
+ sys.exit()
76
+ webservice.send(soapStr)
77
+ statuscode, statusmessage, header = webservice.getreply()
78
+ if statuscode == 200:
79
+ xml = webservice.getfile()
80
+ if type == "version":
81
+ exp = re.compile("\<Version\>([0-9]{1,4})\<\/Version\>")
82
+ sigxml = exp.search(xml.read())
83
+ if len(sigxml.group(1)) > 0:
84
+ return int(sigxml.group(1))
85
+ else:
86
+ sys.stderr.write("getPronomSignature(): could not parse VERSION from SOAP response: "+type)
87
+ return False
88
+ if type == "file":
89
+ exp = re.compile("\<SignatureFile\>.*\<\/SignatureFile\>")
90
+ sigxml = exp.search(xml.read())
91
+ sigtxt = sigxml.group(0) if sigxml else ''
92
+ if len(sigtxt) > 0:
93
+ tmpfile = "./tmp_getPronomSignature.xml"
94
+ tmp = open(tmpfile,'wb')
95
+ tmp.write("""<?xml version="1.0" encoding="UTF-8"?>"""+"\n")
96
+ tmp.write(sigtxt)
97
+ tmp.close()
98
+ if not checkWellFormedness(tmpfile):
99
+ os.unlink(tmpfile)
100
+ sys.stderr.write("getPronomSignature(): signaturefile not well formed")
101
+ return False
102
+ else:
103
+ os.unlink(tmpfile)
104
+ return """<?xml version="1.0" encoding="UTF-8"?>"""+"\n"+sigtxt
105
+ else:
106
+ sys.stderr.write("getPronomSignature(): could not parse XML from SOAP response: "+type)
107
+ return False
108
+ else:
109
+ sys.stderr.write("getPronomSignature(): webservice error: '"+str(statuscode)+" "+statusmessage+"'\n")
110
+ return False
111
+ print sys.stderr.write("getPronomSignature(): unexpected return")
112
+ return False
113
+ except Exception, e:
114
+ print sys.stderr.write("getPronomSignature(): unknown error: "+str(e))
115
+ return False
@@ -0,0 +1,52 @@
1
+ #!python
2
+ # -*- coding: utf-8 -*-
3
+ #
4
+ # FIDO csv output to XML
5
+ # Author: Maurice de Rooij <maurice.de.rooij@nationaalarchief.nl>, september 2011
6
+ #
7
+ # Usage in combination with FIDO:
8
+ # Windows: python fido.py [ARGS] | python toxml.py > output.xml
9
+ # Linux: fido.py [ARGS] | toxml.py > output.xml
10
+ #
11
+ # Usage afterwards:
12
+ # Windows: type output.csv | toxml.py > output.xml
13
+ # Linux: cat output.csv | toxml.py > output.xml
14
+ #
15
+ # for difference in usage, see:
16
+ # http://bugs.python.org/issue9390
17
+ # http://support.microsoft.com/default.aspx?kbid=321788
18
+ #
19
+
20
+ import sys
21
+ import csv
22
+ import string
23
+
24
+ # define FIDO version
25
+ fidoVersion = '1.0'
26
+ # define PRONOM signature version
27
+ signatureVersion = '56'
28
+
29
+ sys.stdout.write("""<?xml version="1.0" encoding="utf-8"?>
30
+ <fido_output>
31
+ <versions>
32
+ <fido_version>{0}</fido_version>
33
+ <signature_version>{1}</signature_version>
34
+ </versions>""".format(fidoVersion,signatureVersion))
35
+
36
+ reader = csv.reader(sys.stdin)
37
+
38
+ for row in reader:
39
+ sys.stdout.write("""
40
+ <file>
41
+ <filename>{0}</filename>
42
+ <status>{1}</status>
43
+ <matchtype>{2}</matchtype>
44
+ <time>{3}</time>
45
+ <puid>{4}</puid>
46
+ <mimetype>{5}</mimetype>
47
+ <formatname>{6}</formatname>
48
+ <signaturename>{7}</signaturename>
49
+ <filesize>{8}</filesize>
50
+ </file>""".format(row[6],row[0],row[8],row[1],row[2],row[7],row[3],row[4],row[5]))
51
+
52
+ sys.stdout.write("\n</fido_output>\n")
@@ -0,0 +1,171 @@
1
+ #!python
2
+ # -*- coding: utf-8 -*-
3
+ #
4
+ # FIDO SIGNATURE UPDATER
5
+ #
6
+ # Open Planets Foundation (http://www.openplanetsfoundation.org)
7
+ # See License.txt for license information.
8
+ # Download from: https://github.com/openplanets/fido/releases
9
+ # Author: Maurice de Rooij (NANETH), 2012
10
+ #
11
+ # FIDO uses the UK National Archives (TNA) PRONOM File Format and Container descriptions .
12
+ # PRONOM is available from http://www.nationalarchives.gov.uk/pronom/
13
+ #
14
+ import sys, os, urllib, time, zipfile, shutil
15
+
16
+ from xml.etree import ElementTree as CET
17
+ from xml.etree import ElementTree as VET
18
+ from pronomutils import getPronomSignature, checkWellFormedness
19
+ import prepare
20
+
21
+ defaults = {
22
+ 'version': '1.2.2',
23
+ 'conf_dir': os.path.join(os.path.dirname(__file__), 'conf'),
24
+ 'tmp_dir': 'tmp',
25
+ 'signatureFileName' : 'DROID_SignatureFile-v{0}.xml',
26
+ 'pronomZipFileName' : 'pronom-xml-v{0}.zip',
27
+ 'fidoSignatureVersion' : 'format_extensions.xml',
28
+ 'versionsFileName' : 'versions.xml',
29
+ 'http_throttle' : 0.5, # in secs, to prevent DoS of PRONOM server
30
+ 'containerVersion' : 'container-signature-20130501.xml', # container version is frozen and needs human attention before updating
31
+ 'versionXML' : """<?xml version="1.0" encoding="UTF-8"?>\n<versions>\n\t<pronomVersion>{0}</pronomVersion>\n\t<pronomSignature>{1}</pronomSignature>\n\t<pronomContainerSignature>{2}</pronomContainerSignature>\n\t<fidoExtensionSignature>{3}</fidoExtensionSignature>\n\t<updateScript>{4}</updateScript>\n</versions>"""
32
+ }
33
+
34
+ def main(defaults):
35
+ """
36
+ Updates PRONOM signatures
37
+ Interactive script, requires keyboard input
38
+ """
39
+ try:
40
+ resume_download = False
41
+ answers = ['y','yes']
42
+ versionXML = defaults['versionXML'].format("{0}","{1}",defaults['containerVersion'],defaults['fidoSignatureVersion'],defaults['version'])
43
+ #print versionXML
44
+ print "FIDO signature updater v"+defaults['version']
45
+ print "Contacting PRONOM..."
46
+ currentVersion = getPronomSignature("version")
47
+ if currentVersion == False:
48
+ print "Failed to obtain PRONOM signature file version number, please try again"
49
+ sys.exit()
50
+ print "Querying latest signaturefile version..."
51
+ signatureFile = os.path.join(os.path.abspath(defaults['conf_dir']), defaults['signatureFileName'].format(currentVersion))
52
+ if os.path.isfile(signatureFile):
53
+ print "You already have the latest PRONOM signature file, version "+str(currentVersion)
54
+ ask = raw_input("Update anyway? (yes/no): ")
55
+ if ask.lower() not in answers:
56
+ sys.exit()
57
+ print "Downloading signature file version "+str(currentVersion)+"..."
58
+ currentFile = getPronomSignature("file")
59
+ if currentFile == False:
60
+ print "Failed to obtain PRONOM signature file, please try again"
61
+ exit()
62
+ sigfile = open(signatureFile,'wb')
63
+ sigfile.write(currentFile)
64
+ sigfile.close()
65
+ print "Writing {0}...".format(defaults['signatureFileName'].format(currentVersion))
66
+ print "Extracting PRONOM PUID's from signature file..."
67
+ tree = CET.parse(signatureFile)
68
+ puids = []
69
+ for node in tree.iter("{http://www.nationalarchives.gov.uk/pronom/SignatureFile}FileFormat"):
70
+ puids.append(node.get("PUID"))
71
+ numberPuids = len(puids)
72
+ print "Found "+str(numberPuids)+" PRONOM PUID's"
73
+ print "Downloading signatures can take a while"
74
+ ask = raw_input("Continue and download signatures? (yes/no): ")
75
+ if ask.lower() not in answers:
76
+ print "Aborting update..."
77
+ sys.exit()
78
+ tmpdir = os.path.join(os.path.abspath(defaults['conf_dir']), defaults['tmp_dir'])
79
+ if os.path.isdir(tmpdir):
80
+ print "Found previously created temporary folder for download:", tmpdir
81
+ ask = raw_input("Resume download (yes) or start over (no)?: ")
82
+ if ask.lower() in answers:
83
+ print "Resuming download..."
84
+ resume_download = True
85
+ else:
86
+ resume_download = False
87
+ else:
88
+ print "Creating temporary folder for download:", tmpdir
89
+ try:
90
+ os.mkdir(tmpdir)
91
+ except:
92
+ pass
93
+ if not os.path.isdir(tmpdir):
94
+ tmpdir = os.path.join(os.path.abspath(defaults['conf_dir']))
95
+ print "Failed to create temporary folder for PUID's, using", tmpdir
96
+ print "Downloading signatures, one moment please..."
97
+ one_percent = (float(numberPuids) / 100)
98
+ numfiles = 0
99
+ for puid in puids:
100
+ puidType, puidNum = puid.split("/")
101
+ puidFileName = "puid."+puidType+"."+puidNum+".xml"
102
+ filename = os.path.join(tmpdir, puidFileName)
103
+ if os.path.isfile(filename) and checkWellFormedness(filename) and resume_download is not False:
104
+ numfiles += 1
105
+ continue
106
+ puidUrl = "http://www.nationalarchives.gov.uk/pronom/"+puid+".xml"
107
+ try:
108
+ filehandle = urllib.urlopen(puidUrl)
109
+ except Exception, e:
110
+ print "Failed to download signaturefile:", puidUrl
111
+ print "Error:", str(e)
112
+ print "Please restart and resume download"
113
+ sys.exit()
114
+ puidfile = open(filename,'wb')
115
+ for lines in filehandle.readlines():
116
+ puidfile.write(lines)
117
+ puidfile.close()
118
+ filehandle.close()
119
+ if not checkWellFormedness(filename):
120
+ os.unlink(filename)
121
+ continue
122
+ numfiles += 1
123
+ percent = int(float(numfiles) / one_percent)
124
+ print "\r",
125
+ print str(percent)+"%",
126
+ time.sleep(defaults['http_throttle'])
127
+ print "100%"
128
+ try:
129
+ import zlib
130
+ compression = zipfile.ZIP_DEFLATED
131
+ except:
132
+ compression = zipfile.ZIP_STORED
133
+ modes = {zipfile.ZIP_DEFLATED: 'deflated', zipfile.ZIP_STORED: 'stored'}
134
+ print "Creating PRONOM zip,",
135
+ zf = zipfile.ZipFile(os.path.join(os.path.abspath(defaults['conf_dir']), defaults['pronomZipFileName'].format(currentVersion)), mode='w')
136
+ print "adding files with compression mode '"+modes[compression]+"'"
137
+ for puid in puids:
138
+ puidType, puidNum = puid.split("/")
139
+ puidFileName = "puid."+puidType+"."+puidNum+".xml"
140
+ filename = os.path.join(os.path.abspath(defaults['conf_dir']), defaults['tmp_dir'], puidFileName)
141
+ if os.path.isfile(filename):
142
+ zf.write(filename, arcname=puidFileName, compress_type=compression)
143
+ os.unlink(filename)
144
+ zf.close()
145
+ print "Deleting temporary folder and files..."
146
+ try:
147
+ for root, dirs, files in os.walk(tmpdir, topdown=False):
148
+ for name in files:
149
+ os.remove(os.path.join(root, name))
150
+ for name in dirs:
151
+ os.rmdir(os.path.join(root, name))
152
+ os.rmdir(tmpdir)
153
+ except:
154
+ pass
155
+ # update versions.xml
156
+ versionsFile = os.path.join(os.path.abspath(defaults['conf_dir']), defaults['versionsFileName'])
157
+ print "Updating {0}...".format(defaults['versionsFileName'])
158
+ xmlversionsfile = open(versionsFile,'wb')
159
+ xmlversionsfile.write(versionXML.format(str(currentVersion),"formats-v"+str(currentVersion)+".xml"))
160
+ xmlversionsfile.close()
161
+ print "Preparing to convert PRONOM formats to FIDO signatures..."
162
+ # there should be a check here to handle prepare.main exit() signal (-1/0/1/...)
163
+ prepare.main()
164
+ print "FIDO signatures successfully updated"
165
+ sys.exit()
166
+ except KeyboardInterrupt:
167
+ print "\nAborting update"
168
+ sys.exit()
169
+
170
+ if __name__ == '__main__':
171
+ main(defaults)
metadata ADDED
@@ -0,0 +1,342 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: libis-format
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.9.1
5
+ platform: ruby
6
+ authors:
7
+ - Kris Dekeyser
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-05-22 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.3'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.3'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.1'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.1'
55
+ - !ruby/object:Gem::Dependency
56
+ name: simplecov
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0.9'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '0.9'
69
+ - !ruby/object:Gem::Dependency
70
+ name: libis-tools
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '0.9'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '0.9'
83
+ - !ruby/object:Gem::Dependency
84
+ name: os
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '='
88
+ - !ruby/object:Gem::Version
89
+ version: 0.9.6
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '='
95
+ - !ruby/object:Gem::Version
96
+ version: 0.9.6
97
+ description: Collection of tools and classes that help to identify formats of binary
98
+ files and create derivative copies (e.g. PDF from Word).
99
+ email:
100
+ - kris.dekeyser@libis.be
101
+ executables:
102
+ - droid
103
+ - fido
104
+ extensions: []
105
+ extra_rdoc_files: []
106
+ files:
107
+ - ".coveralls.yml"
108
+ - ".gitignore"
109
+ - ".travis.yml"
110
+ - Gemfile
111
+ - LICENSE.txt
112
+ - README.md
113
+ - Rakefile
114
+ - bin/droid
115
+ - bin/fido
116
+ - data/ISOcoated.icc
117
+ - data/PDFA_def.ps
118
+ - data/ead.xsd
119
+ - data/lias_formats.xml
120
+ - data/types.yml
121
+ - lib/libis-format.rb
122
+ - lib/libis/format.rb
123
+ - lib/libis/format/converter.rb
124
+ - lib/libis/format/converter/base.rb
125
+ - lib/libis/format/converter/chain.rb
126
+ - lib/libis/format/converter/repository.rb
127
+ - lib/libis/format/droid.rb
128
+ - lib/libis/format/fido.rb
129
+ - lib/libis/format/identifier.rb
130
+ - lib/libis/format/type_database.rb
131
+ - lib/libis/format/version.rb
132
+ - libis-format.gemspec
133
+ - spec/data/Cevennes2.bmp
134
+ - spec/data/Cevennes2.jp2
135
+ - spec/data/Cevennes2.ppm
136
+ - spec/data/test-ead.xml
137
+ - spec/data/test-jpg.tif
138
+ - spec/data/test-lzw.tif
139
+ - spec/data/test.bmp
140
+ - spec/data/test.doc
141
+ - spec/data/test.docx
142
+ - spec/data/test.gif
143
+ - spec/data/test.ods
144
+ - spec/data/test.odt
145
+ - spec/data/test.pdf
146
+ - spec/data/test.png
147
+ - spec/data/test.ps
148
+ - spec/data/test.psd
149
+ - spec/data/test.rtf
150
+ - spec/data/test.tif
151
+ - spec/data/test.txt
152
+ - spec/data/test.xcf
153
+ - spec/data/test.xls
154
+ - spec/data/test.xlsx
155
+ - spec/data/test.xml
156
+ - spec/identifier_spec.rb
157
+ - spec/spec_helper.rb
158
+ - spec/test_types.yml
159
+ - spec/type_database_spec.rb
160
+ - tools/droid/DROID_SignatureFile_V82.xml
161
+ - tools/droid/container-signature-20150307.xml
162
+ - tools/droid/droid-command-line-6.1.5.jar
163
+ - tools/droid/droid.bat
164
+ - tools/droid/droid.sh
165
+ - tools/droid/lib/XmlSchema-1.4.7.jar
166
+ - tools/droid/lib/activation-1.1.jar
167
+ - tools/droid/lib/antlr-2.7.7.jar
168
+ - tools/droid/lib/antlr-3.2.jar
169
+ - tools/droid/lib/antlr-runtime-3.2.jar
170
+ - tools/droid/lib/aopalliance-1.0.jar
171
+ - tools/droid/lib/asm-2.2.3.jar
172
+ - tools/droid/lib/aspectjrt-1.7.2.jar
173
+ - tools/droid/lib/aspectjweaver-1.7.2.jar
174
+ - tools/droid/lib/bcmail-jdk14-138.jar
175
+ - tools/droid/lib/bcprov-jdk14-138.jar
176
+ - tools/droid/lib/beansbinding-1.2.1.jar
177
+ - tools/droid/lib/byteseek-1.1.1.jar
178
+ - tools/droid/lib/cglib-nodep-2.2.2.jar
179
+ - tools/droid/lib/classmate-1.0.0.jar
180
+ - tools/droid/lib/commons-cli-1.2.jar
181
+ - tools/droid/lib/commons-codec-1.4.jar
182
+ - tools/droid/lib/commons-collections-3.2.1.jar
183
+ - tools/droid/lib/commons-compress-1.4.1.jar
184
+ - tools/droid/lib/commons-configuration-1.8.jar
185
+ - tools/droid/lib/commons-dbcp-1.4.jar
186
+ - tools/droid/lib/commons-httpclient-3.1.jar
187
+ - tools/droid/lib/commons-io-2.4.jar
188
+ - tools/droid/lib/commons-lang-2.6.jar
189
+ - tools/droid/lib/commons-logging-1.1.1.jar
190
+ - tools/droid/lib/commons-pool-1.5.4.jar
191
+ - tools/droid/lib/cxf-api-2.2.12.jar
192
+ - tools/droid/lib/cxf-common-schemas-2.2.12.jar
193
+ - tools/droid/lib/cxf-common-utilities-2.2.12.jar
194
+ - tools/droid/lib/cxf-rt-bindings-http-2.2.12.jar
195
+ - tools/droid/lib/cxf-rt-bindings-soap-2.2.12.jar
196
+ - tools/droid/lib/cxf-rt-bindings-xml-2.2.12.jar
197
+ - tools/droid/lib/cxf-rt-core-2.2.12.jar
198
+ - tools/droid/lib/cxf-rt-databinding-jaxb-2.2.12.jar
199
+ - tools/droid/lib/cxf-rt-frontend-jaxws-2.2.12.jar
200
+ - tools/droid/lib/cxf-rt-frontend-simple-2.2.12.jar
201
+ - tools/droid/lib/cxf-rt-transports-http-2.2.12.jar
202
+ - tools/droid/lib/cxf-rt-ws-addr-2.2.12.jar
203
+ - tools/droid/lib/cxf-tools-common-2.2.12.jar
204
+ - tools/droid/lib/de.huxhorn.lilith.3rdparty.flyingsaucer.core-renderer-8RC1.jar
205
+ - tools/droid/lib/derby-10.10.2.0.jar
206
+ - tools/droid/lib/dom4j-1.6.1.jar
207
+ - tools/droid/lib/droid-container-6.1.5.jar
208
+ - tools/droid/lib/droid-core-6.1.5.jar
209
+ - tools/droid/lib/droid-core-interfaces-6.1.5.jar
210
+ - tools/droid/lib/droid-export-6.1.5.jar
211
+ - tools/droid/lib/droid-export-interfaces-6.1.5.jar
212
+ - tools/droid/lib/droid-help-6.1.5.jar
213
+ - tools/droid/lib/droid-report-6.1.5.jar
214
+ - tools/droid/lib/droid-report-interfaces-6.1.5.jar
215
+ - tools/droid/lib/droid-results-6.1.5.jar
216
+ - tools/droid/lib/ejb3-persistence-1.0.2.GA.jar
217
+ - tools/droid/lib/geronimo-activation_1.1_spec-1.0.2.jar
218
+ - tools/droid/lib/geronimo-annotation_1.0_spec-1.1.1.jar
219
+ - tools/droid/lib/geronimo-javamail_1.4_spec-1.6.jar
220
+ - tools/droid/lib/geronimo-jaxws_2.1_spec-1.0.jar
221
+ - tools/droid/lib/geronimo-stax-api_1.0_spec-1.0.1.jar
222
+ - tools/droid/lib/geronimo-ws-metadata_2.0_spec-1.1.2.jar
223
+ - tools/droid/lib/hibernate-commons-annotations-4.0.4.Final.jar
224
+ - tools/droid/lib/hibernate-core-4.3.5.Final.jar
225
+ - tools/droid/lib/hibernate-entitymanager-4.3.5.Final.jar
226
+ - tools/droid/lib/hibernate-jpa-2.1-api-1.0.0.Final.jar
227
+ - tools/droid/lib/hibernate-validator-5.1.0.Final.jar
228
+ - tools/droid/lib/itext-2.0.8.jar
229
+ - tools/droid/lib/jandex-1.1.0.Final.jar
230
+ - tools/droid/lib/javahelp-2.0.05.jar
231
+ - tools/droid/lib/javassist-3.18.1-GA.jar
232
+ - tools/droid/lib/jaxb-api-2.1.jar
233
+ - tools/droid/lib/jaxb-impl-2.1.13.jar
234
+ - tools/droid/lib/jboss-logging-3.1.3.GA.jar
235
+ - tools/droid/lib/jboss-logging-annotations-1.2.0.Beta1.jar
236
+ - tools/droid/lib/jboss-transaction-api_1.2_spec-1.0.0.Final.jar
237
+ - tools/droid/lib/joda-time-1.6.2.jar
238
+ - tools/droid/lib/jra-1.0-alpha-4.jar
239
+ - tools/droid/lib/jta-1.1.jar
240
+ - tools/droid/lib/log4j-1.2.13.jar
241
+ - tools/droid/lib/neethi-2.0.4.jar
242
+ - tools/droid/lib/opencsv-2.3.jar
243
+ - tools/droid/lib/org-netbeans-swing-outline-7.2.jar
244
+ - tools/droid/lib/org-openide-util-7.2.jar
245
+ - tools/droid/lib/org-openide-util-lookup-7.2.jar
246
+ - tools/droid/lib/poi-3.7.jar
247
+ - tools/droid/lib/saaj-api-1.3.jar
248
+ - tools/droid/lib/saaj-impl-1.3.2.jar
249
+ - tools/droid/lib/slf4j-api-1.4.2.jar
250
+ - tools/droid/lib/slf4j-log4j12-1.4.2.jar
251
+ - tools/droid/lib/spring-aop-4.0.3.RELEASE.jar
252
+ - tools/droid/lib/spring-beans-4.0.3.RELEASE.jar
253
+ - tools/droid/lib/spring-context-4.0.3.RELEASE.jar
254
+ - tools/droid/lib/spring-core-4.0.3.RELEASE.jar
255
+ - tools/droid/lib/spring-expression-4.0.3.RELEASE.jar
256
+ - tools/droid/lib/spring-jdbc-4.0.3.RELEASE.jar
257
+ - tools/droid/lib/spring-orm-4.0.3.RELEASE.jar
258
+ - tools/droid/lib/spring-tx-4.0.3.RELEASE.jar
259
+ - tools/droid/lib/spring-web-2.5.6.jar
260
+ - tools/droid/lib/stax-api-1.0-2.jar
261
+ - tools/droid/lib/stringtemplate-3.2.jar
262
+ - tools/droid/lib/truezip-6.8.4.jar
263
+ - tools/droid/lib/validation-api-1.1.0.Final.jar
264
+ - tools/droid/lib/wsdl4j-1.6.2.jar
265
+ - tools/droid/lib/wstx-asl-3.2.9.jar
266
+ - tools/droid/lib/xercesImpl-2.9.1.jar
267
+ - tools/droid/lib/xml-apis-1.3.04.jar
268
+ - tools/droid/lib/xml-resolver-1.2.jar
269
+ - tools/droid/lib/xz-1.0.jar
270
+ - tools/fido/__init__.py
271
+ - tools/fido/argparselocal.py
272
+ - tools/fido/argparselocal.pyc
273
+ - tools/fido/conf/DROID_SignatureFile-v81.xml
274
+ - tools/fido/conf/container-signature-20150307.xml
275
+ - tools/fido/conf/dc.xsd
276
+ - tools/fido/conf/dcmitype.xsd
277
+ - tools/fido/conf/dcterms.xsd
278
+ - tools/fido/conf/fido-formats.xsd
279
+ - tools/fido/conf/format_extension_template.xml
280
+ - tools/fido/conf/format_extensions.xml
281
+ - tools/fido/conf/formats-v81.xml
282
+ - tools/fido/conf/pronom-xml-v81.zip
283
+ - tools/fido/conf/versions.xml
284
+ - tools/fido/fido.bat
285
+ - tools/fido/fido.py
286
+ - tools/fido/fido.sh
287
+ - tools/fido/prepare.py
288
+ - tools/fido/pronomutils.py
289
+ - tools/fido/toxml.py
290
+ - tools/fido/update_signatures.py
291
+ homepage: ''
292
+ licenses:
293
+ - MIT
294
+ metadata: {}
295
+ post_install_message:
296
+ rdoc_options: []
297
+ require_paths:
298
+ - lib
299
+ required_ruby_version: !ruby/object:Gem::Requirement
300
+ requirements:
301
+ - - ">="
302
+ - !ruby/object:Gem::Version
303
+ version: '0'
304
+ required_rubygems_version: !ruby/object:Gem::Requirement
305
+ requirements:
306
+ - - ">="
307
+ - !ruby/object:Gem::Version
308
+ version: '0'
309
+ requirements: []
310
+ rubyforge_project:
311
+ rubygems_version: 2.4.6
312
+ signing_key:
313
+ specification_version: 4
314
+ summary: LIBIS File format format services.
315
+ test_files:
316
+ - spec/data/Cevennes2.bmp
317
+ - spec/data/Cevennes2.jp2
318
+ - spec/data/Cevennes2.ppm
319
+ - spec/data/test-ead.xml
320
+ - spec/data/test-jpg.tif
321
+ - spec/data/test-lzw.tif
322
+ - spec/data/test.bmp
323
+ - spec/data/test.doc
324
+ - spec/data/test.docx
325
+ - spec/data/test.gif
326
+ - spec/data/test.ods
327
+ - spec/data/test.odt
328
+ - spec/data/test.pdf
329
+ - spec/data/test.png
330
+ - spec/data/test.ps
331
+ - spec/data/test.psd
332
+ - spec/data/test.rtf
333
+ - spec/data/test.tif
334
+ - spec/data/test.txt
335
+ - spec/data/test.xcf
336
+ - spec/data/test.xls
337
+ - spec/data/test.xlsx
338
+ - spec/data/test.xml
339
+ - spec/identifier_spec.rb
340
+ - spec/spec_helper.rb
341
+ - spec/test_types.yml
342
+ - spec/type_database_spec.rb