libis-format 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +2 -0
  3. data/.gitignore +15 -0
  4. data/.travis.yml +36 -0
  5. data/Gemfile +5 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +38 -0
  8. data/Rakefile +8 -0
  9. data/bin/droid +15 -0
  10. data/bin/fido +12 -0
  11. data/data/ISOcoated.icc +0 -0
  12. data/data/PDFA_def.ps +32 -0
  13. data/data/ead.xsd +2728 -0
  14. data/data/lias_formats.xml +106 -0
  15. data/data/types.yml +213 -0
  16. data/lib/libis/format/converter/base.rb +103 -0
  17. data/lib/libis/format/converter/chain.rb +80 -0
  18. data/lib/libis/format/converter/repository.rb +110 -0
  19. data/lib/libis/format/converter.rb +11 -0
  20. data/lib/libis/format/droid.rb +38 -0
  21. data/lib/libis/format/fido.rb +109 -0
  22. data/lib/libis/format/identifier.rb +185 -0
  23. data/lib/libis/format/type_database.rb +170 -0
  24. data/lib/libis/format/version.rb +5 -0
  25. data/lib/libis/format.rb +12 -0
  26. data/lib/libis-format.rb +1 -0
  27. data/libis-format.gemspec +30 -0
  28. data/spec/data/Cevennes2.bmp +0 -0
  29. data/spec/data/Cevennes2.jp2 +0 -0
  30. data/spec/data/Cevennes2.ppm +22492 -0
  31. data/spec/data/test-ead.xml +392 -0
  32. data/spec/data/test-jpg.tif +0 -0
  33. data/spec/data/test-lzw.tif +0 -0
  34. data/spec/data/test.bmp +0 -0
  35. data/spec/data/test.doc +0 -0
  36. data/spec/data/test.docx +0 -0
  37. data/spec/data/test.gif +0 -0
  38. data/spec/data/test.ods +0 -0
  39. data/spec/data/test.odt +0 -0
  40. data/spec/data/test.pdf +0 -0
  41. data/spec/data/test.png +0 -0
  42. data/spec/data/test.ps +8631 -0
  43. data/spec/data/test.psd +0 -0
  44. data/spec/data/test.rtf +1455 -0
  45. data/spec/data/test.tif +0 -0
  46. data/spec/data/test.txt +12 -0
  47. data/spec/data/test.xcf +0 -0
  48. data/spec/data/test.xls +0 -0
  49. data/spec/data/test.xlsx +0 -0
  50. data/spec/data/test.xml +4 -0
  51. data/spec/identifier_spec.rb +59 -0
  52. data/spec/spec_helper.rb +9 -0
  53. data/spec/test_types.yml +12 -0
  54. data/spec/type_database_spec.rb +140 -0
  55. data/tools/droid/DROID_SignatureFile_V82.xml +32681 -0
  56. data/tools/droid/container-signature-20150307.xml +2235 -0
  57. data/tools/droid/droid-command-line-6.1.5.jar +0 -0
  58. data/tools/droid/droid.bat +154 -0
  59. data/tools/droid/droid.sh +138 -0
  60. data/tools/droid/lib/XmlSchema-1.4.7.jar +0 -0
  61. data/tools/droid/lib/activation-1.1.jar +0 -0
  62. data/tools/droid/lib/antlr-2.7.7.jar +0 -0
  63. data/tools/droid/lib/antlr-3.2.jar +0 -0
  64. data/tools/droid/lib/antlr-runtime-3.2.jar +0 -0
  65. data/tools/droid/lib/aopalliance-1.0.jar +0 -0
  66. data/tools/droid/lib/asm-2.2.3.jar +0 -0
  67. data/tools/droid/lib/aspectjrt-1.7.2.jar +0 -0
  68. data/tools/droid/lib/aspectjweaver-1.7.2.jar +0 -0
  69. data/tools/droid/lib/bcmail-jdk14-138.jar +0 -0
  70. data/tools/droid/lib/bcprov-jdk14-138.jar +0 -0
  71. data/tools/droid/lib/beansbinding-1.2.1.jar +0 -0
  72. data/tools/droid/lib/byteseek-1.1.1.jar +0 -0
  73. data/tools/droid/lib/cglib-nodep-2.2.2.jar +0 -0
  74. data/tools/droid/lib/classmate-1.0.0.jar +0 -0
  75. data/tools/droid/lib/commons-cli-1.2.jar +0 -0
  76. data/tools/droid/lib/commons-codec-1.4.jar +0 -0
  77. data/tools/droid/lib/commons-collections-3.2.1.jar +0 -0
  78. data/tools/droid/lib/commons-compress-1.4.1.jar +0 -0
  79. data/tools/droid/lib/commons-configuration-1.8.jar +0 -0
  80. data/tools/droid/lib/commons-dbcp-1.4.jar +0 -0
  81. data/tools/droid/lib/commons-httpclient-3.1.jar +0 -0
  82. data/tools/droid/lib/commons-io-2.4.jar +0 -0
  83. data/tools/droid/lib/commons-lang-2.6.jar +0 -0
  84. data/tools/droid/lib/commons-logging-1.1.1.jar +0 -0
  85. data/tools/droid/lib/commons-pool-1.5.4.jar +0 -0
  86. data/tools/droid/lib/cxf-api-2.2.12.jar +0 -0
  87. data/tools/droid/lib/cxf-common-schemas-2.2.12.jar +0 -0
  88. data/tools/droid/lib/cxf-common-utilities-2.2.12.jar +0 -0
  89. data/tools/droid/lib/cxf-rt-bindings-http-2.2.12.jar +0 -0
  90. data/tools/droid/lib/cxf-rt-bindings-soap-2.2.12.jar +0 -0
  91. data/tools/droid/lib/cxf-rt-bindings-xml-2.2.12.jar +0 -0
  92. data/tools/droid/lib/cxf-rt-core-2.2.12.jar +0 -0
  93. data/tools/droid/lib/cxf-rt-databinding-jaxb-2.2.12.jar +0 -0
  94. data/tools/droid/lib/cxf-rt-frontend-jaxws-2.2.12.jar +0 -0
  95. data/tools/droid/lib/cxf-rt-frontend-simple-2.2.12.jar +0 -0
  96. data/tools/droid/lib/cxf-rt-transports-http-2.2.12.jar +0 -0
  97. data/tools/droid/lib/cxf-rt-ws-addr-2.2.12.jar +0 -0
  98. data/tools/droid/lib/cxf-tools-common-2.2.12.jar +0 -0
  99. data/tools/droid/lib/de.huxhorn.lilith.3rdparty.flyingsaucer.core-renderer-8RC1.jar +0 -0
  100. data/tools/droid/lib/derby-10.10.2.0.jar +0 -0
  101. data/tools/droid/lib/dom4j-1.6.1.jar +0 -0
  102. data/tools/droid/lib/droid-container-6.1.5.jar +0 -0
  103. data/tools/droid/lib/droid-core-6.1.5.jar +0 -0
  104. data/tools/droid/lib/droid-core-interfaces-6.1.5.jar +0 -0
  105. data/tools/droid/lib/droid-export-6.1.5.jar +0 -0
  106. data/tools/droid/lib/droid-export-interfaces-6.1.5.jar +0 -0
  107. data/tools/droid/lib/droid-help-6.1.5.jar +0 -0
  108. data/tools/droid/lib/droid-report-6.1.5.jar +0 -0
  109. data/tools/droid/lib/droid-report-interfaces-6.1.5.jar +0 -0
  110. data/tools/droid/lib/droid-results-6.1.5.jar +0 -0
  111. data/tools/droid/lib/ejb3-persistence-1.0.2.GA.jar +0 -0
  112. data/tools/droid/lib/geronimo-activation_1.1_spec-1.0.2.jar +0 -0
  113. data/tools/droid/lib/geronimo-annotation_1.0_spec-1.1.1.jar +0 -0
  114. data/tools/droid/lib/geronimo-javamail_1.4_spec-1.6.jar +0 -0
  115. data/tools/droid/lib/geronimo-jaxws_2.1_spec-1.0.jar +0 -0
  116. data/tools/droid/lib/geronimo-stax-api_1.0_spec-1.0.1.jar +0 -0
  117. data/tools/droid/lib/geronimo-ws-metadata_2.0_spec-1.1.2.jar +0 -0
  118. data/tools/droid/lib/hibernate-commons-annotations-4.0.4.Final.jar +0 -0
  119. data/tools/droid/lib/hibernate-core-4.3.5.Final.jar +0 -0
  120. data/tools/droid/lib/hibernate-entitymanager-4.3.5.Final.jar +0 -0
  121. data/tools/droid/lib/hibernate-jpa-2.1-api-1.0.0.Final.jar +0 -0
  122. data/tools/droid/lib/hibernate-validator-5.1.0.Final.jar +0 -0
  123. data/tools/droid/lib/itext-2.0.8.jar +0 -0
  124. data/tools/droid/lib/jandex-1.1.0.Final.jar +0 -0
  125. data/tools/droid/lib/javahelp-2.0.05.jar +0 -0
  126. data/tools/droid/lib/javassist-3.18.1-GA.jar +0 -0
  127. data/tools/droid/lib/jaxb-api-2.1.jar +0 -0
  128. data/tools/droid/lib/jaxb-impl-2.1.13.jar +0 -0
  129. data/tools/droid/lib/jboss-logging-3.1.3.GA.jar +0 -0
  130. data/tools/droid/lib/jboss-logging-annotations-1.2.0.Beta1.jar +0 -0
  131. data/tools/droid/lib/jboss-transaction-api_1.2_spec-1.0.0.Final.jar +0 -0
  132. data/tools/droid/lib/joda-time-1.6.2.jar +0 -0
  133. data/tools/droid/lib/jra-1.0-alpha-4.jar +0 -0
  134. data/tools/droid/lib/jta-1.1.jar +0 -0
  135. data/tools/droid/lib/log4j-1.2.13.jar +0 -0
  136. data/tools/droid/lib/neethi-2.0.4.jar +0 -0
  137. data/tools/droid/lib/opencsv-2.3.jar +0 -0
  138. data/tools/droid/lib/org-netbeans-swing-outline-7.2.jar +0 -0
  139. data/tools/droid/lib/org-openide-util-7.2.jar +0 -0
  140. data/tools/droid/lib/org-openide-util-lookup-7.2.jar +0 -0
  141. data/tools/droid/lib/poi-3.7.jar +0 -0
  142. data/tools/droid/lib/saaj-api-1.3.jar +0 -0
  143. data/tools/droid/lib/saaj-impl-1.3.2.jar +0 -0
  144. data/tools/droid/lib/slf4j-api-1.4.2.jar +0 -0
  145. data/tools/droid/lib/slf4j-log4j12-1.4.2.jar +0 -0
  146. data/tools/droid/lib/spring-aop-4.0.3.RELEASE.jar +0 -0
  147. data/tools/droid/lib/spring-beans-4.0.3.RELEASE.jar +0 -0
  148. data/tools/droid/lib/spring-context-4.0.3.RELEASE.jar +0 -0
  149. data/tools/droid/lib/spring-core-4.0.3.RELEASE.jar +0 -0
  150. data/tools/droid/lib/spring-expression-4.0.3.RELEASE.jar +0 -0
  151. data/tools/droid/lib/spring-jdbc-4.0.3.RELEASE.jar +0 -0
  152. data/tools/droid/lib/spring-orm-4.0.3.RELEASE.jar +0 -0
  153. data/tools/droid/lib/spring-tx-4.0.3.RELEASE.jar +0 -0
  154. data/tools/droid/lib/spring-web-2.5.6.jar +0 -0
  155. data/tools/droid/lib/stax-api-1.0-2.jar +0 -0
  156. data/tools/droid/lib/stringtemplate-3.2.jar +0 -0
  157. data/tools/droid/lib/truezip-6.8.4.jar +0 -0
  158. data/tools/droid/lib/validation-api-1.1.0.Final.jar +0 -0
  159. data/tools/droid/lib/wsdl4j-1.6.2.jar +0 -0
  160. data/tools/droid/lib/wstx-asl-3.2.9.jar +0 -0
  161. data/tools/droid/lib/xercesImpl-2.9.1.jar +0 -0
  162. data/tools/droid/lib/xml-apis-1.3.04.jar +0 -0
  163. data/tools/droid/lib/xml-resolver-1.2.jar +0 -0
  164. data/tools/droid/lib/xz-1.0.jar +0 -0
  165. data/tools/fido/__init__.py +0 -0
  166. data/tools/fido/argparselocal.py +2355 -0
  167. data/tools/fido/argparselocal.pyc +0 -0
  168. data/tools/fido/conf/DROID_SignatureFile-v81.xml +2 -0
  169. data/tools/fido/conf/container-signature-20150307.xml +2238 -0
  170. data/tools/fido/conf/dc.xsd +119 -0
  171. data/tools/fido/conf/dcmitype.xsd +53 -0
  172. data/tools/fido/conf/dcterms.xsd +383 -0
  173. data/tools/fido/conf/fido-formats.xsd +173 -0
  174. data/tools/fido/conf/format_extension_template.xml +105 -0
  175. data/tools/fido/conf/format_extensions.xml +498 -0
  176. data/tools/fido/conf/formats-v81.xml +38355 -0
  177. data/tools/fido/conf/pronom-xml-v81.zip +0 -0
  178. data/tools/fido/conf/versions.xml +8 -0
  179. data/tools/fido/fido.bat +4 -0
  180. data/tools/fido/fido.py +854 -0
  181. data/tools/fido/fido.sh +5 -0
  182. data/tools/fido/prepare.py +616 -0
  183. data/tools/fido/pronomutils.py +115 -0
  184. data/tools/fido/toxml.py +52 -0
  185. data/tools/fido/update_signatures.py +171 -0
  186. metadata +342 -0
@@ -0,0 +1,115 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # PRONOM UTILS
4
+ #
5
+ # PYTHON FUNCTION TO QUERY PRONOM VERSION
6
+ # AND DOWNLOAD SIGNATUREFILE
7
+ # USES PRONOM SOAP SERVICE
8
+ #
9
+ # Open Planets Foundation (http://www.openplanetsfoundation.org)
10
+ # See License.txt for license information.
11
+ # Download from: http://github.com/openplanets/fido/downloads
12
+ # Author: Maurice de Rooij (OPF/NANETH), 2012
13
+ #
14
+ # PRONOM UTILS is a library used by FIDO
15
+ # FIDO uses the UK National Archives (TNA) PRONOM File Format and Container descriptions.
16
+ # PRONOM is available from http://www.nationalarchives.gov.uk/pronom/
17
+ #
18
+ import sys
19
+ from xml.dom import minidom
20
+ __pronomutils__ = {'version' : '1.0.1'}
21
+
22
+ def checkWellFormedness(filename,error=False):
23
+ """
24
+ usage: checkWellFormedness(filename)
25
+ arguments:
26
+ "filename": returns true if filename is a valid XML file
27
+ "error": whether or not print to stderr upon error
28
+ """
29
+ import xml.parsers.expat
30
+ parser = xml.parsers.expat.ParserCreate()
31
+ try:
32
+ parser.ParseFile(open(filename, "r"))
33
+ except Exception, e:
34
+ if error is not False:
35
+ sys.stderr.write("checkWellFormedness: %s: %s;\n" % (filename, e))
36
+ return False
37
+ return True
38
+
39
+ def getPronomSignature(type):
40
+ """
41
+ usage: getPronomSignature(version|file)
42
+ arguments:
43
+ "version": returns latest signature file version number as int
44
+ "file": returns latest signature XML file as string
45
+ upon error: writes to stderr and returns false
46
+ """
47
+ try:
48
+ import httplib
49
+ import re
50
+ import os
51
+ soapVersionContainer = """<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><getSignatureFileVersionV1 xmlns="http://pronom.nationalarchives.gov.uk" /></soap:Body></soap:Envelope>"""
52
+ soapFileContainer = """<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><getSignatureFileV1 xmlns="http://pronom.nationalarchives.gov.uk" /></soap:Body></soap:Envelope>"""
53
+ soapVersionHeader = """\"http://pronom.nationalarchives.gov.uk:getSignatureFileVersionV1In\""""
54
+ soapFileHeader = """\"http://pronom.nationalarchives.gov.uk:getSignatureFileV1In\""""
55
+ if type == "version":
56
+ soapAction = soapVersionHeader
57
+ soapStr = soapVersionContainer
58
+ elif type == "file":
59
+ soapAction = soapFileHeader
60
+ soapStr = soapFileContainer
61
+ else:
62
+ sys.stderr.write("getPronomSignature(): unknown type: "+type)
63
+ return False
64
+ webservice = httplib.HTTP("apps.nationalarchives.gov.uk")
65
+ webservice.putrequest("POST", "/pronom/service.asmx")
66
+ webservice.putheader("Host", "www.nationalarchives.gov.uk")
67
+ webservice.putheader("User-Agent", "PRONOM UTILS v{0} (OPF)".format(__pronomutils__['version']))
68
+ webservice.putheader("Content-type", "text/xml; charset=\"UTF-8\"")
69
+ webservice.putheader("Content-length", "%d" % len(soapStr))
70
+ webservice.putheader("SOAPAction", soapAction)
71
+ try:
72
+ webservice.endheaders()
73
+ except Exception, e:
74
+ sys.stderr.write("getPronomSignature(): failed to contact PRONOM;\n%s\n" % (e))
75
+ sys.exit()
76
+ webservice.send(soapStr)
77
+ statuscode, statusmessage, header = webservice.getreply()
78
+ if statuscode == 200:
79
+ xml = webservice.getfile()
80
+ if type == "version":
81
+ exp = re.compile("\<Version\>([0-9]{1,4})\<\/Version\>")
82
+ sigxml = exp.search(xml.read())
83
+ if len(sigxml.group(1)) > 0:
84
+ return int(sigxml.group(1))
85
+ else:
86
+ sys.stderr.write("getPronomSignature(): could not parse VERSION from SOAP response: "+type)
87
+ return False
88
+ if type == "file":
89
+ exp = re.compile("\<SignatureFile\>.*\<\/SignatureFile\>")
90
+ sigxml = exp.search(xml.read())
91
+ sigtxt = sigxml.group(0) if sigxml else ''
92
+ if len(sigtxt) > 0:
93
+ tmpfile = "./tmp_getPronomSignature.xml"
94
+ tmp = open(tmpfile,'wb')
95
+ tmp.write("""<?xml version="1.0" encoding="UTF-8"?>"""+"\n")
96
+ tmp.write(sigtxt)
97
+ tmp.close()
98
+ if not checkWellFormedness(tmpfile):
99
+ os.unlink(tmpfile)
100
+ sys.stderr.write("getPronomSignature(): signaturefile not well formed")
101
+ return False
102
+ else:
103
+ os.unlink(tmpfile)
104
+ return """<?xml version="1.0" encoding="UTF-8"?>"""+"\n"+sigtxt
105
+ else:
106
+ sys.stderr.write("getPronomSignature(): could not parse XML from SOAP response: "+type)
107
+ return False
108
+ else:
109
+ sys.stderr.write("getPronomSignature(): webservice error: '"+str(statuscode)+" "+statusmessage+"'\n")
110
+ return False
111
+ print sys.stderr.write("getPronomSignature(): unexpected return")
112
+ return False
113
+ except Exception, e:
114
+ print sys.stderr.write("getPronomSignature(): unknown error: "+str(e))
115
+ return False
@@ -0,0 +1,52 @@
1
+ #!python
2
+ # -*- coding: utf-8 -*-
3
+ #
4
+ # FIDO csv output to XML
5
+ # Author: Maurice de Rooij <maurice.de.rooij@nationaalarchief.nl>, september 2011
6
+ #
7
+ # Usage in combination with FIDO:
8
+ # Windows: python fido.py [ARGS] | python toxml.py > output.xml
9
+ # Linux: fido.py [ARGS] | toxml.py > output.xml
10
+ #
11
+ # Usage afterwards:
12
+ # Windows: type output.csv | toxml.py > output.xml
13
+ # Linux: cat output.csv | toxml.py > output.xml
14
+ #
15
+ # for difference in usage, see:
16
+ # http://bugs.python.org/issue9390
17
+ # http://support.microsoft.com/default.aspx?kbid=321788
18
+ #
19
+
20
+ import sys
21
+ import csv
22
+ import string
23
+
24
+ # define FIDO version
25
+ fidoVersion = '1.0'
26
+ # define PRONOM signature version
27
+ signatureVersion = '56'
28
+
29
+ sys.stdout.write("""<?xml version="1.0" encoding="utf-8"?>
30
+ <fido_output>
31
+ <versions>
32
+ <fido_version>{0}</fido_version>
33
+ <signature_version>{1}</signature_version>
34
+ </versions>""".format(fidoVersion,signatureVersion))
35
+
36
+ reader = csv.reader(sys.stdin)
37
+
38
+ for row in reader:
39
+ sys.stdout.write("""
40
+ <file>
41
+ <filename>{0}</filename>
42
+ <status>{1}</status>
43
+ <matchtype>{2}</matchtype>
44
+ <time>{3}</time>
45
+ <puid>{4}</puid>
46
+ <mimetype>{5}</mimetype>
47
+ <formatname>{6}</formatname>
48
+ <signaturename>{7}</signaturename>
49
+ <filesize>{8}</filesize>
50
+ </file>""".format(row[6],row[0],row[8],row[1],row[2],row[7],row[3],row[4],row[5]))
51
+
52
+ sys.stdout.write("\n</fido_output>\n")
@@ -0,0 +1,171 @@
1
+ #!python
2
+ # -*- coding: utf-8 -*-
3
+ #
4
+ # FIDO SIGNATURE UPDATER
5
+ #
6
+ # Open Planets Foundation (http://www.openplanetsfoundation.org)
7
+ # See License.txt for license information.
8
+ # Download from: https://github.com/openplanets/fido/releases
9
+ # Author: Maurice de Rooij (NANETH), 2012
10
+ #
11
+ # FIDO uses the UK National Archives (TNA) PRONOM File Format and Container descriptions .
12
+ # PRONOM is available from http://www.nationalarchives.gov.uk/pronom/
13
+ #
14
+ import sys, os, urllib, time, zipfile, shutil
15
+
16
+ from xml.etree import ElementTree as CET
17
+ from xml.etree import ElementTree as VET
18
+ from pronomutils import getPronomSignature, checkWellFormedness
19
+ import prepare
20
+
21
+ defaults = {
22
+ 'version': '1.2.2',
23
+ 'conf_dir': os.path.join(os.path.dirname(__file__), 'conf'),
24
+ 'tmp_dir': 'tmp',
25
+ 'signatureFileName' : 'DROID_SignatureFile-v{0}.xml',
26
+ 'pronomZipFileName' : 'pronom-xml-v{0}.zip',
27
+ 'fidoSignatureVersion' : 'format_extensions.xml',
28
+ 'versionsFileName' : 'versions.xml',
29
+ 'http_throttle' : 0.5, # in secs, to prevent DoS of PRONOM server
30
+ 'containerVersion' : 'container-signature-20130501.xml', # container version is frozen and needs human attention before updating
31
+ 'versionXML' : """<?xml version="1.0" encoding="UTF-8"?>\n<versions>\n\t<pronomVersion>{0}</pronomVersion>\n\t<pronomSignature>{1}</pronomSignature>\n\t<pronomContainerSignature>{2}</pronomContainerSignature>\n\t<fidoExtensionSignature>{3}</fidoExtensionSignature>\n\t<updateScript>{4}</updateScript>\n</versions>"""
32
+ }
33
+
34
+ def main(defaults):
35
+ """
36
+ Updates PRONOM signatures
37
+ Interactive script, requires keyboard input
38
+ """
39
+ try:
40
+ resume_download = False
41
+ answers = ['y','yes']
42
+ versionXML = defaults['versionXML'].format("{0}","{1}",defaults['containerVersion'],defaults['fidoSignatureVersion'],defaults['version'])
43
+ #print versionXML
44
+ print "FIDO signature updater v"+defaults['version']
45
+ print "Contacting PRONOM..."
46
+ currentVersion = getPronomSignature("version")
47
+ if currentVersion == False:
48
+ print "Failed to obtain PRONOM signature file version number, please try again"
49
+ sys.exit()
50
+ print "Querying latest signaturefile version..."
51
+ signatureFile = os.path.join(os.path.abspath(defaults['conf_dir']), defaults['signatureFileName'].format(currentVersion))
52
+ if os.path.isfile(signatureFile):
53
+ print "You already have the latest PRONOM signature file, version "+str(currentVersion)
54
+ ask = raw_input("Update anyway? (yes/no): ")
55
+ if ask.lower() not in answers:
56
+ sys.exit()
57
+ print "Downloading signature file version "+str(currentVersion)+"..."
58
+ currentFile = getPronomSignature("file")
59
+ if currentFile == False:
60
+ print "Failed to obtain PRONOM signature file, please try again"
61
+ exit()
62
+ sigfile = open(signatureFile,'wb')
63
+ sigfile.write(currentFile)
64
+ sigfile.close()
65
+ print "Writing {0}...".format(defaults['signatureFileName'].format(currentVersion))
66
+ print "Extracting PRONOM PUID's from signature file..."
67
+ tree = CET.parse(signatureFile)
68
+ puids = []
69
+ for node in tree.iter("{http://www.nationalarchives.gov.uk/pronom/SignatureFile}FileFormat"):
70
+ puids.append(node.get("PUID"))
71
+ numberPuids = len(puids)
72
+ print "Found "+str(numberPuids)+" PRONOM PUID's"
73
+ print "Downloading signatures can take a while"
74
+ ask = raw_input("Continue and download signatures? (yes/no): ")
75
+ if ask.lower() not in answers:
76
+ print "Aborting update..."
77
+ sys.exit()
78
+ tmpdir = os.path.join(os.path.abspath(defaults['conf_dir']), defaults['tmp_dir'])
79
+ if os.path.isdir(tmpdir):
80
+ print "Found previously created temporary folder for download:", tmpdir
81
+ ask = raw_input("Resume download (yes) or start over (no)?: ")
82
+ if ask.lower() in answers:
83
+ print "Resuming download..."
84
+ resume_download = True
85
+ else:
86
+ resume_download = False
87
+ else:
88
+ print "Creating temporary folder for download:", tmpdir
89
+ try:
90
+ os.mkdir(tmpdir)
91
+ except:
92
+ pass
93
+ if not os.path.isdir(tmpdir):
94
+ tmpdir = os.path.join(os.path.abspath(defaults['conf_dir']))
95
+ print "Failed to create temporary folder for PUID's, using", tmpdir
96
+ print "Downloading signatures, one moment please..."
97
+ one_percent = (float(numberPuids) / 100)
98
+ numfiles = 0
99
+ for puid in puids:
100
+ puidType, puidNum = puid.split("/")
101
+ puidFileName = "puid."+puidType+"."+puidNum+".xml"
102
+ filename = os.path.join(tmpdir, puidFileName)
103
+ if os.path.isfile(filename) and checkWellFormedness(filename) and resume_download is not False:
104
+ numfiles += 1
105
+ continue
106
+ puidUrl = "http://www.nationalarchives.gov.uk/pronom/"+puid+".xml"
107
+ try:
108
+ filehandle = urllib.urlopen(puidUrl)
109
+ except Exception, e:
110
+ print "Failed to download signaturefile:", puidUrl
111
+ print "Error:", str(e)
112
+ print "Please restart and resume download"
113
+ sys.exit()
114
+ puidfile = open(filename,'wb')
115
+ for lines in filehandle.readlines():
116
+ puidfile.write(lines)
117
+ puidfile.close()
118
+ filehandle.close()
119
+ if not checkWellFormedness(filename):
120
+ os.unlink(filename)
121
+ continue
122
+ numfiles += 1
123
+ percent = int(float(numfiles) / one_percent)
124
+ print "\r",
125
+ print str(percent)+"%",
126
+ time.sleep(defaults['http_throttle'])
127
+ print "100%"
128
+ try:
129
+ import zlib
130
+ compression = zipfile.ZIP_DEFLATED
131
+ except:
132
+ compression = zipfile.ZIP_STORED
133
+ modes = {zipfile.ZIP_DEFLATED: 'deflated', zipfile.ZIP_STORED: 'stored'}
134
+ print "Creating PRONOM zip,",
135
+ zf = zipfile.ZipFile(os.path.join(os.path.abspath(defaults['conf_dir']), defaults['pronomZipFileName'].format(currentVersion)), mode='w')
136
+ print "adding files with compression mode '"+modes[compression]+"'"
137
+ for puid in puids:
138
+ puidType, puidNum = puid.split("/")
139
+ puidFileName = "puid."+puidType+"."+puidNum+".xml"
140
+ filename = os.path.join(os.path.abspath(defaults['conf_dir']), defaults['tmp_dir'], puidFileName)
141
+ if os.path.isfile(filename):
142
+ zf.write(filename, arcname=puidFileName, compress_type=compression)
143
+ os.unlink(filename)
144
+ zf.close()
145
+ print "Deleting temporary folder and files..."
146
+ try:
147
+ for root, dirs, files in os.walk(tmpdir, topdown=False):
148
+ for name in files:
149
+ os.remove(os.path.join(root, name))
150
+ for name in dirs:
151
+ os.rmdir(os.path.join(root, name))
152
+ os.rmdir(tmpdir)
153
+ except:
154
+ pass
155
+ # update versions.xml
156
+ versionsFile = os.path.join(os.path.abspath(defaults['conf_dir']), defaults['versionsFileName'])
157
+ print "Updating {0}...".format(defaults['versionsFileName'])
158
+ xmlversionsfile = open(versionsFile,'wb')
159
+ xmlversionsfile.write(versionXML.format(str(currentVersion),"formats-v"+str(currentVersion)+".xml"))
160
+ xmlversionsfile.close()
161
+ print "Preparing to convert PRONOM formats to FIDO signatures..."
162
+ # there should be a check here to handle prepare.main exit() signal (-1/0/1/...)
163
+ prepare.main()
164
+ print "FIDO signatures successfully updated"
165
+ sys.exit()
166
+ except KeyboardInterrupt:
167
+ print "\nAborting update"
168
+ sys.exit()
169
+
170
+ if __name__ == '__main__':
171
+ main(defaults)
metadata ADDED
@@ -0,0 +1,342 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: libis-format
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.9.1
5
+ platform: ruby
6
+ authors:
7
+ - Kris Dekeyser
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-05-22 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.3'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.3'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.1'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.1'
55
+ - !ruby/object:Gem::Dependency
56
+ name: simplecov
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0.9'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '0.9'
69
+ - !ruby/object:Gem::Dependency
70
+ name: libis-tools
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '0.9'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '0.9'
83
+ - !ruby/object:Gem::Dependency
84
+ name: os
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '='
88
+ - !ruby/object:Gem::Version
89
+ version: 0.9.6
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '='
95
+ - !ruby/object:Gem::Version
96
+ version: 0.9.6
97
+ description: Collection of tools and classes that help to identify formats of binary
98
+ files and create derivative copies (e.g. PDF from Word).
99
+ email:
100
+ - kris.dekeyser@libis.be
101
+ executables:
102
+ - droid
103
+ - fido
104
+ extensions: []
105
+ extra_rdoc_files: []
106
+ files:
107
+ - ".coveralls.yml"
108
+ - ".gitignore"
109
+ - ".travis.yml"
110
+ - Gemfile
111
+ - LICENSE.txt
112
+ - README.md
113
+ - Rakefile
114
+ - bin/droid
115
+ - bin/fido
116
+ - data/ISOcoated.icc
117
+ - data/PDFA_def.ps
118
+ - data/ead.xsd
119
+ - data/lias_formats.xml
120
+ - data/types.yml
121
+ - lib/libis-format.rb
122
+ - lib/libis/format.rb
123
+ - lib/libis/format/converter.rb
124
+ - lib/libis/format/converter/base.rb
125
+ - lib/libis/format/converter/chain.rb
126
+ - lib/libis/format/converter/repository.rb
127
+ - lib/libis/format/droid.rb
128
+ - lib/libis/format/fido.rb
129
+ - lib/libis/format/identifier.rb
130
+ - lib/libis/format/type_database.rb
131
+ - lib/libis/format/version.rb
132
+ - libis-format.gemspec
133
+ - spec/data/Cevennes2.bmp
134
+ - spec/data/Cevennes2.jp2
135
+ - spec/data/Cevennes2.ppm
136
+ - spec/data/test-ead.xml
137
+ - spec/data/test-jpg.tif
138
+ - spec/data/test-lzw.tif
139
+ - spec/data/test.bmp
140
+ - spec/data/test.doc
141
+ - spec/data/test.docx
142
+ - spec/data/test.gif
143
+ - spec/data/test.ods
144
+ - spec/data/test.odt
145
+ - spec/data/test.pdf
146
+ - spec/data/test.png
147
+ - spec/data/test.ps
148
+ - spec/data/test.psd
149
+ - spec/data/test.rtf
150
+ - spec/data/test.tif
151
+ - spec/data/test.txt
152
+ - spec/data/test.xcf
153
+ - spec/data/test.xls
154
+ - spec/data/test.xlsx
155
+ - spec/data/test.xml
156
+ - spec/identifier_spec.rb
157
+ - spec/spec_helper.rb
158
+ - spec/test_types.yml
159
+ - spec/type_database_spec.rb
160
+ - tools/droid/DROID_SignatureFile_V82.xml
161
+ - tools/droid/container-signature-20150307.xml
162
+ - tools/droid/droid-command-line-6.1.5.jar
163
+ - tools/droid/droid.bat
164
+ - tools/droid/droid.sh
165
+ - tools/droid/lib/XmlSchema-1.4.7.jar
166
+ - tools/droid/lib/activation-1.1.jar
167
+ - tools/droid/lib/antlr-2.7.7.jar
168
+ - tools/droid/lib/antlr-3.2.jar
169
+ - tools/droid/lib/antlr-runtime-3.2.jar
170
+ - tools/droid/lib/aopalliance-1.0.jar
171
+ - tools/droid/lib/asm-2.2.3.jar
172
+ - tools/droid/lib/aspectjrt-1.7.2.jar
173
+ - tools/droid/lib/aspectjweaver-1.7.2.jar
174
+ - tools/droid/lib/bcmail-jdk14-138.jar
175
+ - tools/droid/lib/bcprov-jdk14-138.jar
176
+ - tools/droid/lib/beansbinding-1.2.1.jar
177
+ - tools/droid/lib/byteseek-1.1.1.jar
178
+ - tools/droid/lib/cglib-nodep-2.2.2.jar
179
+ - tools/droid/lib/classmate-1.0.0.jar
180
+ - tools/droid/lib/commons-cli-1.2.jar
181
+ - tools/droid/lib/commons-codec-1.4.jar
182
+ - tools/droid/lib/commons-collections-3.2.1.jar
183
+ - tools/droid/lib/commons-compress-1.4.1.jar
184
+ - tools/droid/lib/commons-configuration-1.8.jar
185
+ - tools/droid/lib/commons-dbcp-1.4.jar
186
+ - tools/droid/lib/commons-httpclient-3.1.jar
187
+ - tools/droid/lib/commons-io-2.4.jar
188
+ - tools/droid/lib/commons-lang-2.6.jar
189
+ - tools/droid/lib/commons-logging-1.1.1.jar
190
+ - tools/droid/lib/commons-pool-1.5.4.jar
191
+ - tools/droid/lib/cxf-api-2.2.12.jar
192
+ - tools/droid/lib/cxf-common-schemas-2.2.12.jar
193
+ - tools/droid/lib/cxf-common-utilities-2.2.12.jar
194
+ - tools/droid/lib/cxf-rt-bindings-http-2.2.12.jar
195
+ - tools/droid/lib/cxf-rt-bindings-soap-2.2.12.jar
196
+ - tools/droid/lib/cxf-rt-bindings-xml-2.2.12.jar
197
+ - tools/droid/lib/cxf-rt-core-2.2.12.jar
198
+ - tools/droid/lib/cxf-rt-databinding-jaxb-2.2.12.jar
199
+ - tools/droid/lib/cxf-rt-frontend-jaxws-2.2.12.jar
200
+ - tools/droid/lib/cxf-rt-frontend-simple-2.2.12.jar
201
+ - tools/droid/lib/cxf-rt-transports-http-2.2.12.jar
202
+ - tools/droid/lib/cxf-rt-ws-addr-2.2.12.jar
203
+ - tools/droid/lib/cxf-tools-common-2.2.12.jar
204
+ - tools/droid/lib/de.huxhorn.lilith.3rdparty.flyingsaucer.core-renderer-8RC1.jar
205
+ - tools/droid/lib/derby-10.10.2.0.jar
206
+ - tools/droid/lib/dom4j-1.6.1.jar
207
+ - tools/droid/lib/droid-container-6.1.5.jar
208
+ - tools/droid/lib/droid-core-6.1.5.jar
209
+ - tools/droid/lib/droid-core-interfaces-6.1.5.jar
210
+ - tools/droid/lib/droid-export-6.1.5.jar
211
+ - tools/droid/lib/droid-export-interfaces-6.1.5.jar
212
+ - tools/droid/lib/droid-help-6.1.5.jar
213
+ - tools/droid/lib/droid-report-6.1.5.jar
214
+ - tools/droid/lib/droid-report-interfaces-6.1.5.jar
215
+ - tools/droid/lib/droid-results-6.1.5.jar
216
+ - tools/droid/lib/ejb3-persistence-1.0.2.GA.jar
217
+ - tools/droid/lib/geronimo-activation_1.1_spec-1.0.2.jar
218
+ - tools/droid/lib/geronimo-annotation_1.0_spec-1.1.1.jar
219
+ - tools/droid/lib/geronimo-javamail_1.4_spec-1.6.jar
220
+ - tools/droid/lib/geronimo-jaxws_2.1_spec-1.0.jar
221
+ - tools/droid/lib/geronimo-stax-api_1.0_spec-1.0.1.jar
222
+ - tools/droid/lib/geronimo-ws-metadata_2.0_spec-1.1.2.jar
223
+ - tools/droid/lib/hibernate-commons-annotations-4.0.4.Final.jar
224
+ - tools/droid/lib/hibernate-core-4.3.5.Final.jar
225
+ - tools/droid/lib/hibernate-entitymanager-4.3.5.Final.jar
226
+ - tools/droid/lib/hibernate-jpa-2.1-api-1.0.0.Final.jar
227
+ - tools/droid/lib/hibernate-validator-5.1.0.Final.jar
228
+ - tools/droid/lib/itext-2.0.8.jar
229
+ - tools/droid/lib/jandex-1.1.0.Final.jar
230
+ - tools/droid/lib/javahelp-2.0.05.jar
231
+ - tools/droid/lib/javassist-3.18.1-GA.jar
232
+ - tools/droid/lib/jaxb-api-2.1.jar
233
+ - tools/droid/lib/jaxb-impl-2.1.13.jar
234
+ - tools/droid/lib/jboss-logging-3.1.3.GA.jar
235
+ - tools/droid/lib/jboss-logging-annotations-1.2.0.Beta1.jar
236
+ - tools/droid/lib/jboss-transaction-api_1.2_spec-1.0.0.Final.jar
237
+ - tools/droid/lib/joda-time-1.6.2.jar
238
+ - tools/droid/lib/jra-1.0-alpha-4.jar
239
+ - tools/droid/lib/jta-1.1.jar
240
+ - tools/droid/lib/log4j-1.2.13.jar
241
+ - tools/droid/lib/neethi-2.0.4.jar
242
+ - tools/droid/lib/opencsv-2.3.jar
243
+ - tools/droid/lib/org-netbeans-swing-outline-7.2.jar
244
+ - tools/droid/lib/org-openide-util-7.2.jar
245
+ - tools/droid/lib/org-openide-util-lookup-7.2.jar
246
+ - tools/droid/lib/poi-3.7.jar
247
+ - tools/droid/lib/saaj-api-1.3.jar
248
+ - tools/droid/lib/saaj-impl-1.3.2.jar
249
+ - tools/droid/lib/slf4j-api-1.4.2.jar
250
+ - tools/droid/lib/slf4j-log4j12-1.4.2.jar
251
+ - tools/droid/lib/spring-aop-4.0.3.RELEASE.jar
252
+ - tools/droid/lib/spring-beans-4.0.3.RELEASE.jar
253
+ - tools/droid/lib/spring-context-4.0.3.RELEASE.jar
254
+ - tools/droid/lib/spring-core-4.0.3.RELEASE.jar
255
+ - tools/droid/lib/spring-expression-4.0.3.RELEASE.jar
256
+ - tools/droid/lib/spring-jdbc-4.0.3.RELEASE.jar
257
+ - tools/droid/lib/spring-orm-4.0.3.RELEASE.jar
258
+ - tools/droid/lib/spring-tx-4.0.3.RELEASE.jar
259
+ - tools/droid/lib/spring-web-2.5.6.jar
260
+ - tools/droid/lib/stax-api-1.0-2.jar
261
+ - tools/droid/lib/stringtemplate-3.2.jar
262
+ - tools/droid/lib/truezip-6.8.4.jar
263
+ - tools/droid/lib/validation-api-1.1.0.Final.jar
264
+ - tools/droid/lib/wsdl4j-1.6.2.jar
265
+ - tools/droid/lib/wstx-asl-3.2.9.jar
266
+ - tools/droid/lib/xercesImpl-2.9.1.jar
267
+ - tools/droid/lib/xml-apis-1.3.04.jar
268
+ - tools/droid/lib/xml-resolver-1.2.jar
269
+ - tools/droid/lib/xz-1.0.jar
270
+ - tools/fido/__init__.py
271
+ - tools/fido/argparselocal.py
272
+ - tools/fido/argparselocal.pyc
273
+ - tools/fido/conf/DROID_SignatureFile-v81.xml
274
+ - tools/fido/conf/container-signature-20150307.xml
275
+ - tools/fido/conf/dc.xsd
276
+ - tools/fido/conf/dcmitype.xsd
277
+ - tools/fido/conf/dcterms.xsd
278
+ - tools/fido/conf/fido-formats.xsd
279
+ - tools/fido/conf/format_extension_template.xml
280
+ - tools/fido/conf/format_extensions.xml
281
+ - tools/fido/conf/formats-v81.xml
282
+ - tools/fido/conf/pronom-xml-v81.zip
283
+ - tools/fido/conf/versions.xml
284
+ - tools/fido/fido.bat
285
+ - tools/fido/fido.py
286
+ - tools/fido/fido.sh
287
+ - tools/fido/prepare.py
288
+ - tools/fido/pronomutils.py
289
+ - tools/fido/toxml.py
290
+ - tools/fido/update_signatures.py
291
+ homepage: ''
292
+ licenses:
293
+ - MIT
294
+ metadata: {}
295
+ post_install_message:
296
+ rdoc_options: []
297
+ require_paths:
298
+ - lib
299
+ required_ruby_version: !ruby/object:Gem::Requirement
300
+ requirements:
301
+ - - ">="
302
+ - !ruby/object:Gem::Version
303
+ version: '0'
304
+ required_rubygems_version: !ruby/object:Gem::Requirement
305
+ requirements:
306
+ - - ">="
307
+ - !ruby/object:Gem::Version
308
+ version: '0'
309
+ requirements: []
310
+ rubyforge_project:
311
+ rubygems_version: 2.4.6
312
+ signing_key:
313
+ specification_version: 4
314
+ summary: LIBIS File format format services.
315
+ test_files:
316
+ - spec/data/Cevennes2.bmp
317
+ - spec/data/Cevennes2.jp2
318
+ - spec/data/Cevennes2.ppm
319
+ - spec/data/test-ead.xml
320
+ - spec/data/test-jpg.tif
321
+ - spec/data/test-lzw.tif
322
+ - spec/data/test.bmp
323
+ - spec/data/test.doc
324
+ - spec/data/test.docx
325
+ - spec/data/test.gif
326
+ - spec/data/test.ods
327
+ - spec/data/test.odt
328
+ - spec/data/test.pdf
329
+ - spec/data/test.png
330
+ - spec/data/test.ps
331
+ - spec/data/test.psd
332
+ - spec/data/test.rtf
333
+ - spec/data/test.tif
334
+ - spec/data/test.txt
335
+ - spec/data/test.xcf
336
+ - spec/data/test.xls
337
+ - spec/data/test.xlsx
338
+ - spec/data/test.xml
339
+ - spec/identifier_spec.rb
340
+ - spec/spec_helper.rb
341
+ - spec/test_types.yml
342
+ - spec/type_database_spec.rb