libis-format 0.9.30 → 0.9.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/lib/libis/format/converter/image_converter.rb +2 -2
  3. data/lib/libis/format/office_to_pdf.rb +1 -1
  4. data/lib/libis/format/version.rb +1 -1
  5. data/spec/converter_spec.rb +43 -27
  6. data/spec/data/test-options.png +0 -0
  7. data/spec/data/test.pdf.tif +0 -0
  8. data/tools/droid/{DROID_SignatureFile_V82.xml → DROID_SignatureFile_V90.xml} +8202 -701
  9. data/tools/droid/{container-signature-20150307.xml → container-signature-20170330.xml} +3584 -2235
  10. data/tools/droid/droid-command-line-6.3.jar +0 -0
  11. data/tools/droid/droid.bat +152 -154
  12. data/tools/droid/droid.sh +30 -16
  13. data/tools/droid/lib/aspectjrt-1.8.7.jar +0 -0
  14. data/tools/droid/lib/aspectjweaver-1.8.7.jar +0 -0
  15. data/tools/droid/lib/byteseek-2.0.3.jar +0 -0
  16. data/tools/droid/lib/commons-codec-1.10.jar +0 -0
  17. data/tools/droid/lib/commons-collections-3.2.2.jar +0 -0
  18. data/tools/droid/lib/droid-container-6.3.jar +0 -0
  19. data/tools/droid/lib/droid-core-6.3.jar +0 -0
  20. data/tools/droid/lib/droid-core-interfaces-6.3.jar +0 -0
  21. data/tools/droid/lib/droid-export-6.3.jar +0 -0
  22. data/tools/droid/lib/droid-export-interfaces-6.3.jar +0 -0
  23. data/tools/droid/lib/{droid-help-6.1.5.jar → droid-help-6.3.jar} +0 -0
  24. data/tools/droid/lib/droid-report-6.3.jar +0 -0
  25. data/tools/droid/lib/droid-report-interfaces-6.3.jar +0 -0
  26. data/tools/droid/lib/droid-results-6.3.jar +0 -0
  27. data/tools/droid/lib/jwat-arc-1.0.3.jar +0 -0
  28. data/tools/droid/lib/jwat-archive-common-1.0.3.jar +0 -0
  29. data/tools/droid/lib/jwat-common-1.0.3.jar +0 -0
  30. data/tools/droid/lib/jwat-gzip-1.0.3.jar +0 -0
  31. data/tools/droid/lib/jwat-warc-1.0.2.jar +0 -0
  32. data/tools/droid/lib/poi-3.13.jar +0 -0
  33. data/tools/droid/lib/saaj-api-1.3.jar +0 -0
  34. data/tools/droid/lib/trove4j-3.0.3.jar +0 -0
  35. data/tools/fido/__init__.py +50 -0
  36. data/tools/fido/conf/DROID_SignatureFile-v90.xml +2 -0
  37. data/tools/fido/conf/{container-signature-20150307.xml → container-signature-20170330.xml} +1487 -141
  38. data/tools/fido/conf/format_extensions.xml +0 -14
  39. data/tools/fido/conf/{formats-v81.xml → formats-v90.xml} +11409 -887
  40. data/tools/fido/conf/{pronom-xml-v81.zip → pronom-xml-v90.zip} +0 -0
  41. data/tools/fido/conf/versions.xml +6 -6
  42. data/tools/fido/fido.py +437 -407
  43. data/tools/fido/package.py +96 -0
  44. data/tools/fido/prepare.py +217 -188
  45. data/tools/fido/pronomutils.py +143 -58
  46. data/tools/fido/toxml.py +54 -46
  47. data/tools/fido/update_signatures.py +139 -127
  48. metadata +34 -40
  49. data/tools/droid/droid-command-line-6.1.5.jar +0 -0
  50. data/tools/droid/lib/antlr-2.7.7.jar +0 -0
  51. data/tools/droid/lib/antlr-3.2.jar +0 -0
  52. data/tools/droid/lib/antlr-runtime-3.2.jar +0 -0
  53. data/tools/droid/lib/aspectjrt-1.7.2.jar +0 -0
  54. data/tools/droid/lib/aspectjweaver-1.7.2.jar +0 -0
  55. data/tools/droid/lib/byteseek-1.1.1.jar +0 -0
  56. data/tools/droid/lib/commons-codec-1.4.jar +0 -0
  57. data/tools/droid/lib/commons-collections-3.2.1.jar +0 -0
  58. data/tools/droid/lib/dom4j-1.6.1.jar +0 -0
  59. data/tools/droid/lib/droid-container-6.1.5.jar +0 -0
  60. data/tools/droid/lib/droid-core-6.1.5.jar +0 -0
  61. data/tools/droid/lib/droid-core-interfaces-6.1.5.jar +0 -0
  62. data/tools/droid/lib/droid-export-6.1.5.jar +0 -0
  63. data/tools/droid/lib/droid-export-interfaces-6.1.5.jar +0 -0
  64. data/tools/droid/lib/droid-report-6.1.5.jar +0 -0
  65. data/tools/droid/lib/droid-report-interfaces-6.1.5.jar +0 -0
  66. data/tools/droid/lib/droid-results-6.1.5.jar +0 -0
  67. data/tools/droid/lib/ejb3-persistence-1.0.2.GA.jar +0 -0
  68. data/tools/droid/lib/hibernate-commons-annotations-4.0.4.Final.jar +0 -0
  69. data/tools/droid/lib/hibernate-core-4.3.5.Final.jar +0 -0
  70. data/tools/droid/lib/hibernate-entitymanager-4.3.5.Final.jar +0 -0
  71. data/tools/droid/lib/hibernate-jpa-2.1-api-1.0.0.Final.jar +0 -0
  72. data/tools/droid/lib/jandex-1.1.0.Final.jar +0 -0
  73. data/tools/droid/lib/javassist-3.18.1-GA.jar +0 -0
  74. data/tools/droid/lib/jboss-logging-annotations-1.2.0.Beta1.jar +0 -0
  75. data/tools/droid/lib/jboss-transaction-api_1.2_spec-1.0.0.Final.jar +0 -0
  76. data/tools/droid/lib/poi-3.7.jar +0 -0
  77. data/tools/droid/lib/stringtemplate-3.2.jar +0 -0
  78. data/tools/fido/argparselocal.py +0 -2355
  79. data/tools/fido/conf/DROID_SignatureFile-v81.xml +0 -2
@@ -1,115 +1,200 @@
1
1
  # -*- coding: utf-8 -*-
2
- #
3
- # PRONOM UTILS
4
- #
5
- # PYTHON FUNCTION TO QUERY PRONOM VERSION
6
- # AND DOWNLOAD SIGNATUREFILE
7
- # USES PRONOM SOAP SERVICE
8
- #
9
- # Open Planets Foundation (http://www.openplanetsfoundation.org)
10
- # See License.txt for license information.
11
- # Download from: http://github.com/openplanets/fido/downloads
12
- # Author: Maurice de Rooij (OPF/NANETH), 2012
13
- #
14
- # PRONOM UTILS is a library used by FIDO
15
- # FIDO uses the UK National Archives (TNA) PRONOM File Format and Container descriptions.
16
- # PRONOM is available from http://www.nationalarchives.gov.uk/pronom/
17
- #
2
+
3
+ """
4
+ PRONOM UTILS.
5
+
6
+ PYTHON FUNCTION TO QUERY PRONOM VERSION
7
+ AND DOWNLOAD SIGNATUREFILE
8
+ USES PRONOM SOAP SERVICE
9
+
10
+ Open Planets Foundation (http://www.openplanetsfoundation.org)
11
+ See License.txt for license information.
12
+ Download from: http://github.com/openplanets/fido/downloads
13
+ Author: Maurice de Rooij (OPF/NANETH), 2012
14
+
15
+ PRONOM UTILS is a library used by FIDO.
16
+ FIDO uses the UK National Archives (TNA) PRONOM File Format and Container descriptions.
17
+ PRONOM is available from http://www.nationalarchives.gov.uk/pronom/
18
+ """
19
+
20
+ from __future__ import absolute_import
21
+
22
+ import os
23
+ import re
18
24
  import sys
19
- from xml.dom import minidom
20
- __pronomutils__ = {'version' : '1.0.1'}
25
+ from xml.etree import ElementTree as ET
26
+ from xml.etree.ElementTree import parse, ParseError
27
+ from xml.parsers.expat import ExpatError, ParserCreate
28
+
29
+ import six
30
+ from six.moves import http_client
31
+
32
+ from . import __version__, CONFIG_DIR
21
33
 
22
- def checkWellFormedness(filename,error=False):
34
+
35
+ def check_well_formedness(filename, error=False):
23
36
  """
24
- usage: checkWellFormedness(filename)
25
- arguments:
26
- "filename": returns true if filename is a valid XML file
27
- "error": whether or not print to stderr upon error
37
+ Check if a given file contains valid XML.
38
+
39
+ :param filename: file from which the XML is read.
40
+ :param error: whether or not print to `stderr` upon error.
41
+ :returns: whether the file contains valid XML.
28
42
  """
29
- import xml.parsers.expat
30
- parser = xml.parsers.expat.ParserCreate()
43
+ parser = ParserCreate()
31
44
  try:
32
45
  parser.ParseFile(open(filename, "r"))
33
- except Exception, e:
46
+ except ExpatError as e:
34
47
  if error is not False:
35
- sys.stderr.write("checkWellFormedness: %s: %s;\n" % (filename, e))
48
+ sys.stderr.write("check_well_formedness: %s: %s;\n" % (filename, e))
36
49
  return False
37
50
  return True
38
51
 
39
- def getPronomSignature(type):
52
+
53
+ def get_pronom_signature(type_):
40
54
  """
41
- usage: getPronomSignature(version|file)
42
- arguments:
43
- "version": returns latest signature file version number as int
44
- "file": returns latest signature XML file as string
45
- upon error: writes to stderr and returns false
55
+ Get PRONOM signature.
56
+
57
+ Return latest signature file version number as int when `type_` equals
58
+ "version" or return latest signature XML file as string when `type_` equals
59
+ "file". Upon error, write to `stderr` and returls `False`.
46
60
  """
47
61
  try:
48
- import httplib
49
- import re
50
- import os
51
62
  soapVersionContainer = """<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><getSignatureFileVersionV1 xmlns="http://pronom.nationalarchives.gov.uk" /></soap:Body></soap:Envelope>"""
52
63
  soapFileContainer = """<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><getSignatureFileV1 xmlns="http://pronom.nationalarchives.gov.uk" /></soap:Body></soap:Envelope>"""
53
64
  soapVersionHeader = """\"http://pronom.nationalarchives.gov.uk:getSignatureFileVersionV1In\""""
54
65
  soapFileHeader = """\"http://pronom.nationalarchives.gov.uk:getSignatureFileV1In\""""
55
- if type == "version":
66
+ if type_ == "version":
56
67
  soapAction = soapVersionHeader
57
68
  soapStr = soapVersionContainer
58
- elif type == "file":
69
+ elif type_ == "file":
59
70
  soapAction = soapFileHeader
60
71
  soapStr = soapFileContainer
61
72
  else:
62
- sys.stderr.write("getPronomSignature(): unknown type: "+type)
73
+ sys.stderr.write("get_pronom_signature(): unknown type: " + type_)
63
74
  return False
64
- webservice = httplib.HTTP("apps.nationalarchives.gov.uk")
75
+ webservice = http_client.HTTP("www.nationalarchives.gov.uk")
65
76
  webservice.putrequest("POST", "/pronom/service.asmx")
66
77
  webservice.putheader("Host", "www.nationalarchives.gov.uk")
67
- webservice.putheader("User-Agent", "PRONOM UTILS v{0} (OPF)".format(__pronomutils__['version']))
78
+ webservice.putheader("User-Agent", "PRONOM UTILS v{0} (OPF)".format(__version__))
68
79
  webservice.putheader("Content-type", "text/xml; charset=\"UTF-8\"")
69
80
  webservice.putheader("Content-length", "%d" % len(soapStr))
70
81
  webservice.putheader("SOAPAction", soapAction)
71
82
  try:
72
83
  webservice.endheaders()
73
- except Exception, e:
74
- sys.stderr.write("getPronomSignature(): failed to contact PRONOM;\n%s\n" % (e))
84
+ except Exception as e:
85
+ sys.stderr.write("get_pronom_signature(): failed to contact PRONOM;\n%s\n" % (e))
75
86
  sys.exit()
76
87
  webservice.send(soapStr)
77
88
  statuscode, statusmessage, header = webservice.getreply()
78
89
  if statuscode == 200:
79
90
  xml = webservice.getfile()
80
- if type == "version":
91
+ if type_ == "version":
81
92
  exp = re.compile("\<Version\>([0-9]{1,4})\<\/Version\>")
82
93
  sigxml = exp.search(xml.read())
83
94
  if len(sigxml.group(1)) > 0:
84
95
  return int(sigxml.group(1))
85
96
  else:
86
- sys.stderr.write("getPronomSignature(): could not parse VERSION from SOAP response: "+type)
97
+ sys.stderr.write("get_pronom_signature(): could not parse VERSION from SOAP response: " + type_)
87
98
  return False
88
- if type == "file":
99
+ if type_ == "file":
89
100
  exp = re.compile("\<SignatureFile\>.*\<\/SignatureFile\>")
90
101
  sigxml = exp.search(xml.read())
91
102
  sigtxt = sigxml.group(0) if sigxml else ''
92
103
  if len(sigtxt) > 0:
93
104
  tmpfile = "./tmp_getPronomSignature.xml"
94
- tmp = open(tmpfile,'wb')
95
- tmp.write("""<?xml version="1.0" encoding="UTF-8"?>"""+"\n")
96
- tmp.write(sigtxt)
97
- tmp.close()
98
- if not checkWellFormedness(tmpfile):
105
+ with open(tmpfile, 'wb') as file_:
106
+ file_.write("""<?xml version="1.0" encoding="UTF-8"?>""" + "\n")
107
+ file_.write(sigtxt)
108
+ if not check_well_formedness(tmpfile):
99
109
  os.unlink(tmpfile)
100
- sys.stderr.write("getPronomSignature(): signaturefile not well formed")
110
+ sys.stderr.write("get_pronom_signature(): signaturefile not well formed")
101
111
  return False
102
112
  else:
103
113
  os.unlink(tmpfile)
104
- return """<?xml version="1.0" encoding="UTF-8"?>"""+"\n"+sigtxt
114
+ return """<?xml version="1.0" encoding="UTF-8"?>""" + "\n" + sigtxt
105
115
  else:
106
- sys.stderr.write("getPronomSignature(): could not parse XML from SOAP response: "+type)
116
+ sys.stderr.write("get_pronom_signature(): could not parse XML from SOAP response: " + type_)
107
117
  return False
108
118
  else:
109
- sys.stderr.write("getPronomSignature(): webservice error: '"+str(statuscode)+" "+statusmessage+"'\n")
119
+ sys.stderr.write("get_pronom_signature(): webservice error: '" + str(statuscode) + " " + statusmessage + "'\n")
110
120
  return False
111
- print sys.stderr.write("getPronomSignature(): unexpected return")
121
+ sys.stderr.write("get_pronom_signature(): unexpected return")
112
122
  return False
113
- except Exception, e:
114
- print sys.stderr.write("getPronomSignature(): unknown error: "+str(e))
123
+ except Exception as e:
124
+ sys.stderr.write("get_pronom_signature(): unknown error: " + str(e))
115
125
  return False
126
+
127
+
128
+ class LocalPronomVersions(object):
129
+ """
130
+ Parse local PRONOM signature versions XML file.
131
+
132
+ This is how the XML document should look like:
133
+
134
+ <?xml version="1.0" encoding="UTF-8"?>
135
+ <versions>
136
+ <pronomVersion>84</pronomVersion>
137
+ <pronomSignature>formats-v84.xml</pronomSignature>
138
+ <pronomContainerSignature>container-signature-20160121.xml</pronomContainerSignature>
139
+ <fidoExtensionSignature>format_extensions.xml</fidoExtensionSignature>
140
+ <updateScript>1.2.2</updateScript>
141
+ </versions>
142
+ """
143
+
144
+ PROPS_MAPPING = {
145
+ 'pronom_version': 'pronomVersion',
146
+ 'pronom_signature': 'pronomSignature',
147
+ 'pronom_container_signature': 'pronomContainerSignature',
148
+ 'fido_extension_signature': 'fidoExtensionSignature',
149
+ 'update_script': 'updateScript',
150
+ }
151
+
152
+ ROOT_ELEMENT = 'versions'
153
+
154
+ def __init__(self, versions_file):
155
+ """Instantiate class based on the file indicated in `versions_file`."""
156
+ self.versions_file = versions_file
157
+ self.conf_dir = os.path.abspath(os.path.dirname(versions_file))
158
+ try:
159
+ self.tree = parse(versions_file)
160
+ self.root = self.tree.getroot()
161
+ except (ParseError, IOError):
162
+ self.root = ET.Element(self.ROOT_ELEMENT)
163
+ self.tree = ET.ElementTree(self.root)
164
+
165
+ def __getattr__(self, name):
166
+ """Extract the element's text content."""
167
+ if name in self.PROPS_MAPPING:
168
+ return self.root.find(self.PROPS_MAPPING[name]).text
169
+
170
+ def __setattr__(self, name, value):
171
+ """Update the element's text content."""
172
+ if name in self.PROPS_MAPPING:
173
+ try:
174
+ self.root.find(self.PROPS_MAPPING[name]).text = value
175
+ except AttributeError:
176
+ elem = ET.SubElement(self.root, self.PROPS_MAPPING[name])
177
+ elem.text = value
178
+ else:
179
+ object.__setattr__(self, name, value)
180
+
181
+ def get_zip_file(self):
182
+ """Obtain location to the PRONOM XML Zip file based on the current PRONOM version."""
183
+ return os.path.join(self.conf_dir, 'pronom-xml-v{}.zip'.format(self.pronom_version))
184
+
185
+ def get_signature_file(self):
186
+ """Obtain location to the current PRONOM signature file."""
187
+ return os.path.join(self.conf_dir, self.pronom_signature)
188
+
189
+ def write(self):
190
+ """Update versions.xml."""
191
+ # Check that all the fields are defined
192
+ for key, value in six.iteritems(self.PROPS_MAPPING):
193
+ if self.root.find(value) is None:
194
+ raise ValueError('Field {} has not been defined!'.format(key))
195
+ self.tree.write(self.versions_file, xml_declaration=True, method='xml', encoding='utf-8')
196
+
197
+
198
+ def get_local_pronom_versions(config_dir=CONFIG_DIR):
199
+ """Return an instance of LocalPronomVersions loaded with `conf/versions.xml`."""
200
+ return LocalPronomVersions(os.path.join(config_dir, 'versions.xml'))
data/tools/fido/toxml.py CHANGED
@@ -1,52 +1,60 @@
1
- #!python
1
+ #!/usr/bin/env python
2
2
  # -*- coding: utf-8 -*-
3
- #
4
- # FIDO csv output to XML
5
- # Author: Maurice de Rooij <maurice.de.rooij@nationaalarchief.nl>, september 2011
6
- #
7
- # Usage in combination with FIDO:
8
- # Windows: python fido.py [ARGS] | python toxml.py > output.xml
9
- # Linux: fido.py [ARGS] | toxml.py > output.xml
10
- #
11
- # Usage afterwards:
12
- # Windows: type output.csv | toxml.py > output.xml
13
- # Linux: cat output.csv | toxml.py > output.xml
14
- #
15
- # for difference in usage, see:
16
- # http://bugs.python.org/issue9390
17
- # http://support.microsoft.com/default.aspx?kbid=321788
18
- #
19
3
 
20
- import sys
4
+ """
5
+ FIDO CSV output to XML.
6
+
7
+ Author: Maurice de Rooij <maurice.de.rooij@nationaalarchief.nl>, September 2011
8
+
9
+ Usage in combination with FIDO:
10
+ - Windows: python fido.py [ARGS] | python toxml.py > output.xml
11
+ - Linux: fido.py [ARGS] | toxml.py > output.xml
12
+
13
+ Usage afterwards:
14
+ - Windows: type output.csv | toxml.py > output.xml
15
+ - Linux: cat output.csv | toxml.py > output.xml
16
+
17
+ For difference in usage, see:
18
+ - http://bugs.python.org/issue9390
19
+ - http://support.microsoft.com/default.aspx?kbid=321788
20
+ """
21
+
22
+ from __future__ import absolute_import
23
+
21
24
  import csv
22
- import string
25
+ import sys
26
+
27
+ from . import __version__
28
+ from .pronomutils import get_local_pronom_versions
23
29
 
24
- # define FIDO version
25
- fidoVersion = '1.0'
26
- # define PRONOM signature version
27
- signatureVersion = '56'
28
30
 
29
- sys.stdout.write("""<?xml version="1.0" encoding="utf-8"?>
31
+ def main():
32
+ """Generate XML as read from CSV and send it to the standard output stream."""
33
+ sys.stdout.write("""<?xml version="1.0" encoding="utf-8"?>
30
34
  <fido_output>
31
- <versions>
32
- <fido_version>{0}</fido_version>
33
- <signature_version>{1}</signature_version>
34
- </versions>""".format(fidoVersion,signatureVersion))
35
-
36
- reader = csv.reader(sys.stdin)
37
-
38
- for row in reader:
39
- sys.stdout.write("""
40
- <file>
41
- <filename>{0}</filename>
42
- <status>{1}</status>
43
- <matchtype>{2}</matchtype>
44
- <time>{3}</time>
45
- <puid>{4}</puid>
46
- <mimetype>{5}</mimetype>
47
- <formatname>{6}</formatname>
48
- <signaturename>{7}</signaturename>
49
- <filesize>{8}</filesize>
50
- </file>""".format(row[6],row[0],row[8],row[1],row[2],row[7],row[3],row[4],row[5]))
51
-
52
- sys.stdout.write("\n</fido_output>\n")
35
+ <versions>
36
+ <fido_version>{0}</fido_version>
37
+ <signature_version>{1}</signature_version>
38
+ </versions>""".format(__version__, get_local_pronom_versions().pronom_version))
39
+
40
+ reader = csv.reader(sys.stdin)
41
+
42
+ for row in reader:
43
+ sys.stdout.write("""
44
+ <file>
45
+ <filename>{0}</filename>
46
+ <status>{1}</status>
47
+ <matchtype>{2}</matchtype>
48
+ <time>{3}</time>
49
+ <puid>{4}</puid>
50
+ <mimetype>{5}</mimetype>
51
+ <formatname>{6}</formatname>
52
+ <signaturename>{7}</signaturename>
53
+ <filesize>{8}</filesize>
54
+ </file>""".format(row[6], row[0], row[8], row[1], row[2], row[7], row[3], row[4], row[5]))
55
+
56
+ sys.stdout.write("\n</fido_output>\n")
57
+
58
+
59
+ if __name__ == '__main__':
60
+ main()
@@ -1,171 +1,183 @@
1
- #!python
1
+ #!/usr/bin/env python
2
2
  # -*- coding: utf-8 -*-
3
- #
4
- # FIDO SIGNATURE UPDATER
5
- #
6
- # Open Planets Foundation (http://www.openplanetsfoundation.org)
7
- # See License.txt for license information.
8
- # Download from: https://github.com/openplanets/fido/releases
9
- # Author: Maurice de Rooij (NANETH), 2012
10
- #
11
- # FIDO uses the UK National Archives (TNA) PRONOM File Format and Container descriptions .
12
- # PRONOM is available from http://www.nationalarchives.gov.uk/pronom/
13
- #
14
- import sys, os, urllib, time, zipfile, shutil
15
3
 
4
+ """
5
+ FIDO SIGNATURE UPDATER.
6
+
7
+ Open Planets Foundation (http://www.openplanetsfoundation.org)
8
+ See License.txt for license information.
9
+ Download from: https://github.com/openplanets/fido/releases
10
+ Author: Maurice de Rooij (NANETH), 2012
11
+
12
+ FIDO uses the UK National Archives (TNA) PRONOM File Format and Container descriptions.
13
+ PRONOM is available from http://www.nationalarchives.gov.uk/pronom/.
14
+ """
15
+
16
+ from __future__ import print_function
17
+
18
+ from argparse import ArgumentParser
19
+ import os
20
+ from shutil import rmtree
21
+ import sys
22
+ import time
16
23
  from xml.etree import ElementTree as CET
17
- from xml.etree import ElementTree as VET
18
- from pronomutils import getPronomSignature, checkWellFormedness
19
- import prepare
24
+ import zipfile
25
+
26
+ from six.moves.urllib.request import urlopen
27
+
28
+ from . import __version__, CONFIG_DIR, query_yes_no
29
+ from .prepare import run as prepare_pronom_to_fido
30
+ from .pronomutils import check_well_formedness, get_local_pronom_versions, get_pronom_signature
31
+
20
32
 
21
33
  defaults = {
22
- 'version': '1.2.2',
23
- 'conf_dir': os.path.join(os.path.dirname(__file__), 'conf'),
24
- 'tmp_dir': 'tmp',
25
- 'signatureFileName' : 'DROID_SignatureFile-v{0}.xml',
26
- 'pronomZipFileName' : 'pronom-xml-v{0}.zip',
27
- 'fidoSignatureVersion' : 'format_extensions.xml',
28
- 'versionsFileName' : 'versions.xml',
29
- 'http_throttle' : 0.5, # in secs, to prevent DoS of PRONOM server
30
- 'containerVersion' : 'container-signature-20130501.xml', # container version is frozen and needs human attention before updating
31
- 'versionXML' : """<?xml version="1.0" encoding="UTF-8"?>\n<versions>\n\t<pronomVersion>{0}</pronomVersion>\n\t<pronomSignature>{1}</pronomSignature>\n\t<pronomContainerSignature>{2}</pronomContainerSignature>\n\t<fidoExtensionSignature>{3}</fidoExtensionSignature>\n\t<updateScript>{4}</updateScript>\n</versions>"""
32
- }
33
-
34
- def main(defaults):
34
+ 'signatureFileName': 'DROID_SignatureFile-v{0}.xml',
35
+ 'pronomZipFileName': 'pronom-xml-v{0}.zip',
36
+ 'fidoSignatureVersion': 'format_extensions.xml',
37
+ 'containerVersion': 'container-signature-20160121.xml', # container version is frozen and needs human attention before updating,
38
+ }
39
+
40
+ options = {
41
+ 'http_throttle': 0.5, # in secs, to prevent DoS of PRONOM server
42
+ 'tmp_dir': os.path.join(CONFIG_DIR, 'tmp'),
43
+ 'deleteTempDirectory': True,
44
+ }
45
+
46
+
47
+ def run(defaults=defaults):
35
48
  """
36
- Updates PRONOM signatures
37
- Interactive script, requires keyboard input
49
+ Update PRONOM signatures.
50
+
51
+ Interactive script, requires keyboard input.
38
52
  """
53
+ print("FIDO signature updater v{}".format(__version__))
54
+
39
55
  try:
40
- resume_download = False
41
- answers = ['y','yes']
42
- versionXML = defaults['versionXML'].format("{0}","{1}",defaults['containerVersion'],defaults['fidoSignatureVersion'],defaults['version'])
43
- #print versionXML
44
- print "FIDO signature updater v"+defaults['version']
45
- print "Contacting PRONOM..."
46
- currentVersion = getPronomSignature("version")
47
- if currentVersion == False:
48
- print "Failed to obtain PRONOM signature file version number, please try again"
49
- sys.exit()
50
- print "Querying latest signaturefile version..."
51
- signatureFile = os.path.join(os.path.abspath(defaults['conf_dir']), defaults['signatureFileName'].format(currentVersion))
56
+ print("Contacting PRONOM...")
57
+ currentVersion = get_pronom_signature("version")
58
+ if not currentVersion:
59
+ sys.exit('Failed to obtain PRONOM signature file version number, please try again.')
60
+
61
+ print("Querying latest signaturefile version...")
62
+ signatureFile = os.path.join(CONFIG_DIR, defaults['signatureFileName'].format(currentVersion))
52
63
  if os.path.isfile(signatureFile):
53
- print "You already have the latest PRONOM signature file, version "+str(currentVersion)
54
- ask = raw_input("Update anyway? (yes/no): ")
55
- if ask.lower() not in answers:
56
- sys.exit()
57
- print "Downloading signature file version "+str(currentVersion)+"..."
58
- currentFile = getPronomSignature("file")
59
- if currentFile == False:
60
- print "Failed to obtain PRONOM signature file, please try again"
61
- exit()
62
- sigfile = open(signatureFile,'wb')
63
- sigfile.write(currentFile)
64
- sigfile.close()
65
- print "Writing {0}...".format(defaults['signatureFileName'].format(currentVersion))
66
- print "Extracting PRONOM PUID's from signature file..."
64
+ print("You already have the latest PRONOM signature file, version", currentVersion)
65
+ if not query_yes_no("Update anyway?"):
66
+ sys.exit('Aborting update...')
67
+
68
+ print("Downloading signature file version {}...".format(currentVersion))
69
+ currentFile = get_pronom_signature("file")
70
+ if not currentFile:
71
+ sys.exit('Failed to obtain PRONOM signature file, please try again.')
72
+ print("Writing {0}...".format(defaults['signatureFileName'].format(currentVersion)))
73
+ with open(signatureFile, 'wb') as file_:
74
+ file_.write(currentFile)
75
+
76
+ print("Extracting PRONOM PUID's from signature file...")
67
77
  tree = CET.parse(signatureFile)
68
78
  puids = []
69
79
  for node in tree.iter("{http://www.nationalarchives.gov.uk/pronom/SignatureFile}FileFormat"):
70
80
  puids.append(node.get("PUID"))
71
81
  numberPuids = len(puids)
72
- print "Found "+str(numberPuids)+" PRONOM PUID's"
73
- print "Downloading signatures can take a while"
74
- ask = raw_input("Continue and download signatures? (yes/no): ")
75
- if ask.lower() not in answers:
76
- print "Aborting update..."
77
- sys.exit()
78
- tmpdir = os.path.join(os.path.abspath(defaults['conf_dir']), defaults['tmp_dir'])
82
+ print("Found {} PRONOM PUID's".format(numberPuids))
83
+
84
+ print("Downloading signatures can take a while")
85
+ if not query_yes_no("Continue and download signatures?"):
86
+ sys.exit('Aborting update...')
87
+ tmpdir = defaults['tmp_dir']
79
88
  if os.path.isdir(tmpdir):
80
- print "Found previously created temporary folder for download:", tmpdir
81
- ask = raw_input("Resume download (yes) or start over (no)?: ")
82
- if ask.lower() in answers:
83
- print "Resuming download..."
84
- resume_download = True
85
- else:
86
- resume_download = False
89
+ print("Found previously created temporary folder for download:", tmpdir)
90
+ resume_download = query_yes_no('Do you want to resume download (yes) or start over (no)?')
91
+ if resume_download:
92
+ print("Resuming download...")
87
93
  else:
88
- print "Creating temporary folder for download:", tmpdir
94
+ print("Creating temporary folder for download:", tmpdir)
89
95
  try:
90
96
  os.mkdir(tmpdir)
91
97
  except:
92
98
  pass
93
99
  if not os.path.isdir(tmpdir):
94
- tmpdir = os.path.join(os.path.abspath(defaults['conf_dir']))
95
- print "Failed to create temporary folder for PUID's, using", tmpdir
96
- print "Downloading signatures, one moment please..."
100
+ print("Failed to create temporary folder for PUID's, using", tmpdir)
101
+
102
+ print("Downloading signatures, one moment please...")
97
103
  one_percent = (float(numberPuids) / 100)
98
104
  numfiles = 0
99
105
  for puid in puids:
100
106
  puidType, puidNum = puid.split("/")
101
- puidFileName = "puid."+puidType+"."+puidNum+".xml"
107
+ puidFileName = "puid." + puidType + "." + puidNum + ".xml"
102
108
  filename = os.path.join(tmpdir, puidFileName)
103
- if os.path.isfile(filename) and checkWellFormedness(filename) and resume_download is not False:
109
+ if os.path.isfile(filename) and check_well_formedness(filename) and resume_download:
104
110
  numfiles += 1
105
111
  continue
106
- puidUrl = "http://www.nationalarchives.gov.uk/pronom/"+puid+".xml"
112
+ puid_url = "http://www.nationalarchives.gov.uk/pronom/{}.xml".format(puid)
107
113
  try:
108
- filehandle = urllib.urlopen(puidUrl)
109
- except Exception, e:
110
- print "Failed to download signaturefile:", puidUrl
111
- print "Error:", str(e)
112
- print "Please restart and resume download"
113
- sys.exit()
114
- puidfile = open(filename,'wb')
115
- for lines in filehandle.readlines():
116
- puidfile.write(lines)
117
- puidfile.close()
114
+ filehandle = urlopen(puid_url)
115
+ except Exception as e:
116
+ print("Failed to download signature file:", puid_url)
117
+ print("Error:", str(e))
118
+ sys.exit('Please restart and resume download.')
119
+ with open(filename, 'wb') as file_:
120
+ for lines in filehandle.readlines():
121
+ file_.write(lines)
118
122
  filehandle.close()
119
- if not checkWellFormedness(filename):
123
+ if not check_well_formedness(filename):
120
124
  os.unlink(filename)
121
125
  continue
122
126
  numfiles += 1
123
127
  percent = int(float(numfiles) / one_percent)
124
- print "\r",
125
- print str(percent)+"%",
128
+ print(r"{}/{} files [{}%]".format(numfiles, numberPuids, percent))
126
129
  time.sleep(defaults['http_throttle'])
127
- print "100%"
128
- try:
129
- import zlib
130
- compression = zipfile.ZIP_DEFLATED
131
- except:
132
- compression = zipfile.ZIP_STORED
130
+ print("100%")
131
+
132
+ print("Creating PRONOM zip...")
133
+ compression = zipfile.ZIP_DEFLATED if 'zlib' in sys.modules else zipfile.ZIP_STORED
133
134
  modes = {zipfile.ZIP_DEFLATED: 'deflated', zipfile.ZIP_STORED: 'stored'}
134
- print "Creating PRONOM zip,",
135
- zf = zipfile.ZipFile(os.path.join(os.path.abspath(defaults['conf_dir']), defaults['pronomZipFileName'].format(currentVersion)), mode='w')
136
- print "adding files with compression mode '"+modes[compression]+"'"
135
+ zf = zipfile.ZipFile(os.path.join(CONFIG_DIR, defaults['pronomZipFileName'].format(currentVersion)), mode='w')
136
+ print("Adding files with compression mode", modes[compression])
137
137
  for puid in puids:
138
138
  puidType, puidNum = puid.split("/")
139
- puidFileName = "puid."+puidType+"."+puidNum+".xml"
140
- filename = os.path.join(os.path.abspath(defaults['conf_dir']), defaults['tmp_dir'], puidFileName)
139
+ puidFileName = "puid.{}.{}.xml".format(puidType, puidNum)
140
+ filename = os.path.join(tmpdir, puidFileName)
141
141
  if os.path.isfile(filename):
142
142
  zf.write(filename, arcname=puidFileName, compress_type=compression)
143
- os.unlink(filename)
143
+ if defaults['deleteTempDirectory']:
144
+ os.unlink(filename)
144
145
  zf.close()
145
- print "Deleting temporary folder and files..."
146
- try:
147
- for root, dirs, files in os.walk(tmpdir, topdown=False):
148
- for name in files:
149
- os.remove(os.path.join(root, name))
150
- for name in dirs:
151
- os.rmdir(os.path.join(root, name))
152
- os.rmdir(tmpdir)
153
- except:
154
- pass
155
- # update versions.xml
156
- versionsFile = os.path.join(os.path.abspath(defaults['conf_dir']), defaults['versionsFileName'])
157
- print "Updating {0}...".format(defaults['versionsFileName'])
158
- xmlversionsfile = open(versionsFile,'wb')
159
- xmlversionsfile.write(versionXML.format(str(currentVersion),"formats-v"+str(currentVersion)+".xml"))
160
- xmlversionsfile.close()
161
- print "Preparing to convert PRONOM formats to FIDO signatures..."
162
- # there should be a check here to handle prepare.main exit() signal (-1/0/1/...)
163
- prepare.main()
164
- print "FIDO signatures successfully updated"
165
- sys.exit()
146
+
147
+ if defaults['deleteTempDirectory']:
148
+ print("Deleting temporary folder and files...")
149
+ rmtree(tmpdir, ignore_errors=True)
150
+
151
+ print('Updating versions.xml...')
152
+ versions = get_local_pronom_versions()
153
+ versions.pronom_version = str(currentVersion)
154
+ versions.pronom_signature = "formats-v" + str(currentVersion) + ".xml"
155
+ versions.pronom_container_signature = defaults['containerVersion']
156
+ versions.fido_extension_signature = defaults['fidoSignatureVersion']
157
+ versions.update_script = __version__
158
+ versions.write()
159
+
160
+ # TODO: there should be a check here to handle prepare.main exit() signal (-1/0/1/...)
161
+ print("Preparing to convert PRONOM formats to FIDO signatures...")
162
+ prepare_pronom_to_fido()
163
+ print("FIDO signatures successfully updated")
164
+
166
165
  except KeyboardInterrupt:
167
- print "\nAborting update"
168
- sys.exit()
166
+ sys.exit('Aborting update...')
167
+
168
+
169
+ def main():
170
+ """Main CLI entrypoint."""
171
+ parser = ArgumentParser(description='Download and convert the latest PRONOM signatures')
172
+ parser.add_argument('-tmpdir', default=options['tmp_dir'], help='Location to store temporary files', dest='tmp_dir')
173
+ parser.add_argument('-keep_tmp', default=options['deleteTempDirectory'], help='Do not delete temporary files after completion', dest='deleteTempDirectory', action='store_false')
174
+ parser.add_argument('-http_throttle', default=options['http_throttle'], help='Time (in seconds) to wait between downloads', type=float, dest='http_throttle')
175
+ args = parser.parse_args()
176
+ opts = defaults.copy()
177
+ opts.update(vars(args))
178
+
179
+ run(opts)
180
+
169
181
 
170
182
  if __name__ == '__main__':
171
- main(defaults)
183
+ main()