libis-format 0.9.32 → 0.9.33

Sign up to get free protection for your applications and to get access to all the features.
Files changed (139) hide show
  1. checksums.yaml +4 -4
  2. data/data/types.yml +30 -16
  3. data/lib/libis/format/config.rb +7 -18
  4. data/lib/libis/format/converter/image_converter.rb +6 -0
  5. data/lib/libis/format/droid.rb +82 -25
  6. data/lib/libis/format/extension_identification.rb +55 -0
  7. data/lib/libis/format/fido.rb +57 -72
  8. data/lib/libis/format/file_tool.rb +76 -0
  9. data/lib/libis/format/identification_tool.rb +174 -0
  10. data/lib/libis/format/identifier.rb +129 -117
  11. data/lib/libis/format/type_database.rb +36 -5
  12. data/lib/libis/format/version.rb +1 -1
  13. data/lib/libis/format.rb +3 -0
  14. data/libis-format.gemspec +2 -1
  15. data/spec/converter_spec.rb +6 -4
  16. data/spec/identifier_spec.rb +125 -34
  17. metadata +21 -126
  18. data/tools/droid/DROID_SignatureFile_V90.xml +0 -40182
  19. data/tools/droid/container-signature-20170330.xml +0 -3584
  20. data/tools/droid/droid-command-line-6.3.jar +0 -0
  21. data/tools/droid/droid.bat +0 -152
  22. data/tools/droid/droid.sh +0 -152
  23. data/tools/droid/lib/XmlSchema-1.4.7.jar +0 -0
  24. data/tools/droid/lib/activation-1.1.jar +0 -0
  25. data/tools/droid/lib/aopalliance-1.0.jar +0 -0
  26. data/tools/droid/lib/asm-2.2.3.jar +0 -0
  27. data/tools/droid/lib/aspectjrt-1.8.7.jar +0 -0
  28. data/tools/droid/lib/aspectjweaver-1.8.7.jar +0 -0
  29. data/tools/droid/lib/bcmail-jdk14-138.jar +0 -0
  30. data/tools/droid/lib/bcprov-jdk14-138.jar +0 -0
  31. data/tools/droid/lib/beansbinding-1.2.1.jar +0 -0
  32. data/tools/droid/lib/byteseek-2.0.3.jar +0 -0
  33. data/tools/droid/lib/cglib-nodep-2.2.2.jar +0 -0
  34. data/tools/droid/lib/classmate-1.0.0.jar +0 -0
  35. data/tools/droid/lib/commons-cli-1.2.jar +0 -0
  36. data/tools/droid/lib/commons-codec-1.10.jar +0 -0
  37. data/tools/droid/lib/commons-collections-3.2.2.jar +0 -0
  38. data/tools/droid/lib/commons-compress-1.4.1.jar +0 -0
  39. data/tools/droid/lib/commons-configuration-1.8.jar +0 -0
  40. data/tools/droid/lib/commons-dbcp-1.4.jar +0 -0
  41. data/tools/droid/lib/commons-httpclient-3.1.jar +0 -0
  42. data/tools/droid/lib/commons-io-2.4.jar +0 -0
  43. data/tools/droid/lib/commons-lang-2.6.jar +0 -0
  44. data/tools/droid/lib/commons-logging-1.1.1.jar +0 -0
  45. data/tools/droid/lib/commons-pool-1.5.4.jar +0 -0
  46. data/tools/droid/lib/cxf-api-2.2.12.jar +0 -0
  47. data/tools/droid/lib/cxf-common-schemas-2.2.12.jar +0 -0
  48. data/tools/droid/lib/cxf-common-utilities-2.2.12.jar +0 -0
  49. data/tools/droid/lib/cxf-rt-bindings-http-2.2.12.jar +0 -0
  50. data/tools/droid/lib/cxf-rt-bindings-soap-2.2.12.jar +0 -0
  51. data/tools/droid/lib/cxf-rt-bindings-xml-2.2.12.jar +0 -0
  52. data/tools/droid/lib/cxf-rt-core-2.2.12.jar +0 -0
  53. data/tools/droid/lib/cxf-rt-databinding-jaxb-2.2.12.jar +0 -0
  54. data/tools/droid/lib/cxf-rt-frontend-jaxws-2.2.12.jar +0 -0
  55. data/tools/droid/lib/cxf-rt-frontend-simple-2.2.12.jar +0 -0
  56. data/tools/droid/lib/cxf-rt-transports-http-2.2.12.jar +0 -0
  57. data/tools/droid/lib/cxf-rt-ws-addr-2.2.12.jar +0 -0
  58. data/tools/droid/lib/cxf-tools-common-2.2.12.jar +0 -0
  59. data/tools/droid/lib/de.huxhorn.lilith.3rdparty.flyingsaucer.core-renderer-8RC1.jar +0 -0
  60. data/tools/droid/lib/derby-10.10.2.0.jar +0 -0
  61. data/tools/droid/lib/droid-container-6.3.jar +0 -0
  62. data/tools/droid/lib/droid-core-6.3.jar +0 -0
  63. data/tools/droid/lib/droid-core-interfaces-6.3.jar +0 -0
  64. data/tools/droid/lib/droid-export-6.3.jar +0 -0
  65. data/tools/droid/lib/droid-export-interfaces-6.3.jar +0 -0
  66. data/tools/droid/lib/droid-help-6.3.jar +0 -0
  67. data/tools/droid/lib/droid-report-6.3.jar +0 -0
  68. data/tools/droid/lib/droid-report-interfaces-6.3.jar +0 -0
  69. data/tools/droid/lib/droid-results-6.3.jar +0 -0
  70. data/tools/droid/lib/geronimo-activation_1.1_spec-1.0.2.jar +0 -0
  71. data/tools/droid/lib/geronimo-annotation_1.0_spec-1.1.1.jar +0 -0
  72. data/tools/droid/lib/geronimo-javamail_1.4_spec-1.6.jar +0 -0
  73. data/tools/droid/lib/geronimo-jaxws_2.1_spec-1.0.jar +0 -0
  74. data/tools/droid/lib/geronimo-stax-api_1.0_spec-1.0.1.jar +0 -0
  75. data/tools/droid/lib/geronimo-ws-metadata_2.0_spec-1.1.2.jar +0 -0
  76. data/tools/droid/lib/hibernate-validator-5.1.0.Final.jar +0 -0
  77. data/tools/droid/lib/itext-2.0.8.jar +0 -0
  78. data/tools/droid/lib/javahelp-2.0.05.jar +0 -0
  79. data/tools/droid/lib/jaxb-api-2.1.jar +0 -0
  80. data/tools/droid/lib/jaxb-impl-2.1.13.jar +0 -0
  81. data/tools/droid/lib/jboss-logging-3.1.3.GA.jar +0 -0
  82. data/tools/droid/lib/joda-time-1.6.2.jar +0 -0
  83. data/tools/droid/lib/jra-1.0-alpha-4.jar +0 -0
  84. data/tools/droid/lib/jta-1.1.jar +0 -0
  85. data/tools/droid/lib/jwat-arc-1.0.3.jar +0 -0
  86. data/tools/droid/lib/jwat-archive-common-1.0.3.jar +0 -0
  87. data/tools/droid/lib/jwat-common-1.0.3.jar +0 -0
  88. data/tools/droid/lib/jwat-gzip-1.0.3.jar +0 -0
  89. data/tools/droid/lib/jwat-warc-1.0.2.jar +0 -0
  90. data/tools/droid/lib/log4j-1.2.13.jar +0 -0
  91. data/tools/droid/lib/neethi-2.0.4.jar +0 -0
  92. data/tools/droid/lib/opencsv-2.3.jar +0 -0
  93. data/tools/droid/lib/org-netbeans-swing-outline-7.2.jar +0 -0
  94. data/tools/droid/lib/org-openide-util-7.2.jar +0 -0
  95. data/tools/droid/lib/org-openide-util-lookup-7.2.jar +0 -0
  96. data/tools/droid/lib/poi-3.13.jar +0 -0
  97. data/tools/droid/lib/saaj-api-1.3.jar +0 -0
  98. data/tools/droid/lib/saaj-impl-1.3.2.jar +0 -0
  99. data/tools/droid/lib/slf4j-api-1.4.2.jar +0 -0
  100. data/tools/droid/lib/slf4j-log4j12-1.4.2.jar +0 -0
  101. data/tools/droid/lib/spring-aop-4.0.3.RELEASE.jar +0 -0
  102. data/tools/droid/lib/spring-beans-4.0.3.RELEASE.jar +0 -0
  103. data/tools/droid/lib/spring-context-4.0.3.RELEASE.jar +0 -0
  104. data/tools/droid/lib/spring-core-4.0.3.RELEASE.jar +0 -0
  105. data/tools/droid/lib/spring-expression-4.0.3.RELEASE.jar +0 -0
  106. data/tools/droid/lib/spring-jdbc-4.0.3.RELEASE.jar +0 -0
  107. data/tools/droid/lib/spring-orm-4.0.3.RELEASE.jar +0 -0
  108. data/tools/droid/lib/spring-tx-4.0.3.RELEASE.jar +0 -0
  109. data/tools/droid/lib/spring-web-2.5.6.jar +0 -0
  110. data/tools/droid/lib/stax-api-1.0-2.jar +0 -0
  111. data/tools/droid/lib/trove4j-3.0.3.jar +0 -0
  112. data/tools/droid/lib/truezip-6.8.4.jar +0 -0
  113. data/tools/droid/lib/validation-api-1.1.0.Final.jar +0 -0
  114. data/tools/droid/lib/wsdl4j-1.6.2.jar +0 -0
  115. data/tools/droid/lib/wstx-asl-3.2.9.jar +0 -0
  116. data/tools/droid/lib/xercesImpl-2.9.1.jar +0 -0
  117. data/tools/droid/lib/xml-apis-1.3.04.jar +0 -0
  118. data/tools/droid/lib/xml-resolver-1.2.jar +0 -0
  119. data/tools/droid/lib/xz-1.0.jar +0 -0
  120. data/tools/fido/__init__.py +0 -50
  121. data/tools/fido/conf/DROID_SignatureFile-v90.xml +0 -2
  122. data/tools/fido/conf/container-signature-20170330.xml +0 -3584
  123. data/tools/fido/conf/dc.xsd +0 -119
  124. data/tools/fido/conf/dcmitype.xsd +0 -53
  125. data/tools/fido/conf/dcterms.xsd +0 -383
  126. data/tools/fido/conf/fido-formats.xsd +0 -173
  127. data/tools/fido/conf/format_extension_template.xml +0 -105
  128. data/tools/fido/conf/format_extensions.xml +0 -484
  129. data/tools/fido/conf/formats-v90.xml +0 -48877
  130. data/tools/fido/conf/pronom-xml-v90.zip +0 -0
  131. data/tools/fido/conf/versions.xml +0 -8
  132. data/tools/fido/fido.bat +0 -4
  133. data/tools/fido/fido.py +0 -884
  134. data/tools/fido/fido.sh +0 -5
  135. data/tools/fido/package.py +0 -96
  136. data/tools/fido/prepare.py +0 -645
  137. data/tools/fido/pronomutils.py +0 -200
  138. data/tools/fido/toxml.py +0 -60
  139. data/tools/fido/update_signatures.py +0 -183
@@ -1,200 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- """
4
- PRONOM UTILS.
5
-
6
- PYTHON FUNCTION TO QUERY PRONOM VERSION
7
- AND DOWNLOAD SIGNATUREFILE
8
- USES PRONOM SOAP SERVICE
9
-
10
- Open Planets Foundation (http://www.openplanetsfoundation.org)
11
- See License.txt for license information.
12
- Download from: http://github.com/openplanets/fido/downloads
13
- Author: Maurice de Rooij (OPF/NANETH), 2012
14
-
15
- PRONOM UTILS is a library used by FIDO.
16
- FIDO uses the UK National Archives (TNA) PRONOM File Format and Container descriptions.
17
- PRONOM is available from http://www.nationalarchives.gov.uk/pronom/
18
- """
19
-
20
- from __future__ import absolute_import
21
-
22
- import os
23
- import re
24
- import sys
25
- from xml.etree import ElementTree as ET
26
- from xml.etree.ElementTree import parse, ParseError
27
- from xml.parsers.expat import ExpatError, ParserCreate
28
-
29
- import six
30
- from six.moves import http_client
31
-
32
- from . import __version__, CONFIG_DIR
33
-
34
-
35
- def check_well_formedness(filename, error=False):
36
- """
37
- Check if a given file contains valid XML.
38
-
39
- :param filename: file from which the XML is read.
40
- :param error: whether or not print to `stderr` upon error.
41
- :returns: whether the file contains valid XML.
42
- """
43
- parser = ParserCreate()
44
- try:
45
- parser.ParseFile(open(filename, "r"))
46
- except ExpatError as e:
47
- if error is not False:
48
- sys.stderr.write("check_well_formedness: %s: %s;\n" % (filename, e))
49
- return False
50
- return True
51
-
52
-
53
- def get_pronom_signature(type_):
54
- """
55
- Get PRONOM signature.
56
-
57
- Return latest signature file version number as int when `type_` equals
58
- "version" or return latest signature XML file as string when `type_` equals
59
- "file". Upon error, write to `stderr` and returls `False`.
60
- """
61
- try:
62
- soapVersionContainer = """<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><getSignatureFileVersionV1 xmlns="http://pronom.nationalarchives.gov.uk" /></soap:Body></soap:Envelope>"""
63
- soapFileContainer = """<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><getSignatureFileV1 xmlns="http://pronom.nationalarchives.gov.uk" /></soap:Body></soap:Envelope>"""
64
- soapVersionHeader = """\"http://pronom.nationalarchives.gov.uk:getSignatureFileVersionV1In\""""
65
- soapFileHeader = """\"http://pronom.nationalarchives.gov.uk:getSignatureFileV1In\""""
66
- if type_ == "version":
67
- soapAction = soapVersionHeader
68
- soapStr = soapVersionContainer
69
- elif type_ == "file":
70
- soapAction = soapFileHeader
71
- soapStr = soapFileContainer
72
- else:
73
- sys.stderr.write("get_pronom_signature(): unknown type: " + type_)
74
- return False
75
- webservice = http_client.HTTP("www.nationalarchives.gov.uk")
76
- webservice.putrequest("POST", "/pronom/service.asmx")
77
- webservice.putheader("Host", "www.nationalarchives.gov.uk")
78
- webservice.putheader("User-Agent", "PRONOM UTILS v{0} (OPF)".format(__version__))
79
- webservice.putheader("Content-type", "text/xml; charset=\"UTF-8\"")
80
- webservice.putheader("Content-length", "%d" % len(soapStr))
81
- webservice.putheader("SOAPAction", soapAction)
82
- try:
83
- webservice.endheaders()
84
- except Exception as e:
85
- sys.stderr.write("get_pronom_signature(): failed to contact PRONOM;\n%s\n" % (e))
86
- sys.exit()
87
- webservice.send(soapStr)
88
- statuscode, statusmessage, header = webservice.getreply()
89
- if statuscode == 200:
90
- xml = webservice.getfile()
91
- if type_ == "version":
92
- exp = re.compile("\<Version\>([0-9]{1,4})\<\/Version\>")
93
- sigxml = exp.search(xml.read())
94
- if len(sigxml.group(1)) > 0:
95
- return int(sigxml.group(1))
96
- else:
97
- sys.stderr.write("get_pronom_signature(): could not parse VERSION from SOAP response: " + type_)
98
- return False
99
- if type_ == "file":
100
- exp = re.compile("\<SignatureFile\>.*\<\/SignatureFile\>")
101
- sigxml = exp.search(xml.read())
102
- sigtxt = sigxml.group(0) if sigxml else ''
103
- if len(sigtxt) > 0:
104
- tmpfile = "./tmp_getPronomSignature.xml"
105
- with open(tmpfile, 'wb') as file_:
106
- file_.write("""<?xml version="1.0" encoding="UTF-8"?>""" + "\n")
107
- file_.write(sigtxt)
108
- if not check_well_formedness(tmpfile):
109
- os.unlink(tmpfile)
110
- sys.stderr.write("get_pronom_signature(): signaturefile not well formed")
111
- return False
112
- else:
113
- os.unlink(tmpfile)
114
- return """<?xml version="1.0" encoding="UTF-8"?>""" + "\n" + sigtxt
115
- else:
116
- sys.stderr.write("get_pronom_signature(): could not parse XML from SOAP response: " + type_)
117
- return False
118
- else:
119
- sys.stderr.write("get_pronom_signature(): webservice error: '" + str(statuscode) + " " + statusmessage + "'\n")
120
- return False
121
- sys.stderr.write("get_pronom_signature(): unexpected return")
122
- return False
123
- except Exception as e:
124
- sys.stderr.write("get_pronom_signature(): unknown error: " + str(e))
125
- return False
126
-
127
-
128
- class LocalPronomVersions(object):
129
- """
130
- Parse local PRONOM signature versions XML file.
131
-
132
- This is how the XML document should look like:
133
-
134
- <?xml version="1.0" encoding="UTF-8"?>
135
- <versions>
136
- <pronomVersion>84</pronomVersion>
137
- <pronomSignature>formats-v84.xml</pronomSignature>
138
- <pronomContainerSignature>container-signature-20160121.xml</pronomContainerSignature>
139
- <fidoExtensionSignature>format_extensions.xml</fidoExtensionSignature>
140
- <updateScript>1.2.2</updateScript>
141
- </versions>
142
- """
143
-
144
- PROPS_MAPPING = {
145
- 'pronom_version': 'pronomVersion',
146
- 'pronom_signature': 'pronomSignature',
147
- 'pronom_container_signature': 'pronomContainerSignature',
148
- 'fido_extension_signature': 'fidoExtensionSignature',
149
- 'update_script': 'updateScript',
150
- }
151
-
152
- ROOT_ELEMENT = 'versions'
153
-
154
- def __init__(self, versions_file):
155
- """Instantiate class based on the file indicated in `versions_file`."""
156
- self.versions_file = versions_file
157
- self.conf_dir = os.path.abspath(os.path.dirname(versions_file))
158
- try:
159
- self.tree = parse(versions_file)
160
- self.root = self.tree.getroot()
161
- except (ParseError, IOError):
162
- self.root = ET.Element(self.ROOT_ELEMENT)
163
- self.tree = ET.ElementTree(self.root)
164
-
165
- def __getattr__(self, name):
166
- """Extract the element's text content."""
167
- if name in self.PROPS_MAPPING:
168
- return self.root.find(self.PROPS_MAPPING[name]).text
169
-
170
- def __setattr__(self, name, value):
171
- """Update the element's text content."""
172
- if name in self.PROPS_MAPPING:
173
- try:
174
- self.root.find(self.PROPS_MAPPING[name]).text = value
175
- except AttributeError:
176
- elem = ET.SubElement(self.root, self.PROPS_MAPPING[name])
177
- elem.text = value
178
- else:
179
- object.__setattr__(self, name, value)
180
-
181
- def get_zip_file(self):
182
- """Obtain location to the PRONOM XML Zip file based on the current PRONOM version."""
183
- return os.path.join(self.conf_dir, 'pronom-xml-v{}.zip'.format(self.pronom_version))
184
-
185
- def get_signature_file(self):
186
- """Obtain location to the current PRONOM signature file."""
187
- return os.path.join(self.conf_dir, self.pronom_signature)
188
-
189
- def write(self):
190
- """Update versions.xml."""
191
- # Check that all the fields are defined
192
- for key, value in six.iteritems(self.PROPS_MAPPING):
193
- if self.root.find(value) is None:
194
- raise ValueError('Field {} has not been defined!'.format(key))
195
- self.tree.write(self.versions_file, xml_declaration=True, method='xml', encoding='utf-8')
196
-
197
-
198
- def get_local_pronom_versions(config_dir=CONFIG_DIR):
199
- """Return an instance of LocalPronomVersions loaded with `conf/versions.xml`."""
200
- return LocalPronomVersions(os.path.join(config_dir, 'versions.xml'))
data/tools/fido/toxml.py DELETED
@@ -1,60 +0,0 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8 -*-
3
-
4
- """
5
- FIDO CSV output to XML.
6
-
7
- Author: Maurice de Rooij <maurice.de.rooij@nationaalarchief.nl>, September 2011
8
-
9
- Usage in combination with FIDO:
10
- - Windows: python fido.py [ARGS] | python toxml.py > output.xml
11
- - Linux: fido.py [ARGS] | toxml.py > output.xml
12
-
13
- Usage afterwards:
14
- - Windows: type output.csv | toxml.py > output.xml
15
- - Linux: cat output.csv | toxml.py > output.xml
16
-
17
- For difference in usage, see:
18
- - http://bugs.python.org/issue9390
19
- - http://support.microsoft.com/default.aspx?kbid=321788
20
- """
21
-
22
- from __future__ import absolute_import
23
-
24
- import csv
25
- import sys
26
-
27
- from . import __version__
28
- from .pronomutils import get_local_pronom_versions
29
-
30
-
31
- def main():
32
- """Generate XML as read from CSV and send it to the standard output stream."""
33
- sys.stdout.write("""<?xml version="1.0" encoding="utf-8"?>
34
- <fido_output>
35
- <versions>
36
- <fido_version>{0}</fido_version>
37
- <signature_version>{1}</signature_version>
38
- </versions>""".format(__version__, get_local_pronom_versions().pronom_version))
39
-
40
- reader = csv.reader(sys.stdin)
41
-
42
- for row in reader:
43
- sys.stdout.write("""
44
- <file>
45
- <filename>{0}</filename>
46
- <status>{1}</status>
47
- <matchtype>{2}</matchtype>
48
- <time>{3}</time>
49
- <puid>{4}</puid>
50
- <mimetype>{5}</mimetype>
51
- <formatname>{6}</formatname>
52
- <signaturename>{7}</signaturename>
53
- <filesize>{8}</filesize>
54
- </file>""".format(row[6], row[0], row[8], row[1], row[2], row[7], row[3], row[4], row[5]))
55
-
56
- sys.stdout.write("\n</fido_output>\n")
57
-
58
-
59
- if __name__ == '__main__':
60
- main()
@@ -1,183 +0,0 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8 -*-
3
-
4
- """
5
- FIDO SIGNATURE UPDATER.
6
-
7
- Open Planets Foundation (http://www.openplanetsfoundation.org)
8
- See License.txt for license information.
9
- Download from: https://github.com/openplanets/fido/releases
10
- Author: Maurice de Rooij (NANETH), 2012
11
-
12
- FIDO uses the UK National Archives (TNA) PRONOM File Format and Container descriptions.
13
- PRONOM is available from http://www.nationalarchives.gov.uk/pronom/.
14
- """
15
-
16
- from __future__ import print_function
17
-
18
- from argparse import ArgumentParser
19
- import os
20
- from shutil import rmtree
21
- import sys
22
- import time
23
- from xml.etree import ElementTree as CET
24
- import zipfile
25
-
26
- from six.moves.urllib.request import urlopen
27
-
28
- from . import __version__, CONFIG_DIR, query_yes_no
29
- from .prepare import run as prepare_pronom_to_fido
30
- from .pronomutils import check_well_formedness, get_local_pronom_versions, get_pronom_signature
31
-
32
-
33
- defaults = {
34
- 'signatureFileName': 'DROID_SignatureFile-v{0}.xml',
35
- 'pronomZipFileName': 'pronom-xml-v{0}.zip',
36
- 'fidoSignatureVersion': 'format_extensions.xml',
37
- 'containerVersion': 'container-signature-20160121.xml', # container version is frozen and needs human attention before updating,
38
- }
39
-
40
- options = {
41
- 'http_throttle': 0.5, # in secs, to prevent DoS of PRONOM server
42
- 'tmp_dir': os.path.join(CONFIG_DIR, 'tmp'),
43
- 'deleteTempDirectory': True,
44
- }
45
-
46
-
47
- def run(defaults=defaults):
48
- """
49
- Update PRONOM signatures.
50
-
51
- Interactive script, requires keyboard input.
52
- """
53
- print("FIDO signature updater v{}".format(__version__))
54
-
55
- try:
56
- print("Contacting PRONOM...")
57
- currentVersion = get_pronom_signature("version")
58
- if not currentVersion:
59
- sys.exit('Failed to obtain PRONOM signature file version number, please try again.')
60
-
61
- print("Querying latest signaturefile version...")
62
- signatureFile = os.path.join(CONFIG_DIR, defaults['signatureFileName'].format(currentVersion))
63
- if os.path.isfile(signatureFile):
64
- print("You already have the latest PRONOM signature file, version", currentVersion)
65
- if not query_yes_no("Update anyway?"):
66
- sys.exit('Aborting update...')
67
-
68
- print("Downloading signature file version {}...".format(currentVersion))
69
- currentFile = get_pronom_signature("file")
70
- if not currentFile:
71
- sys.exit('Failed to obtain PRONOM signature file, please try again.')
72
- print("Writing {0}...".format(defaults['signatureFileName'].format(currentVersion)))
73
- with open(signatureFile, 'wb') as file_:
74
- file_.write(currentFile)
75
-
76
- print("Extracting PRONOM PUID's from signature file...")
77
- tree = CET.parse(signatureFile)
78
- puids = []
79
- for node in tree.iter("{http://www.nationalarchives.gov.uk/pronom/SignatureFile}FileFormat"):
80
- puids.append(node.get("PUID"))
81
- numberPuids = len(puids)
82
- print("Found {} PRONOM PUID's".format(numberPuids))
83
-
84
- print("Downloading signatures can take a while")
85
- if not query_yes_no("Continue and download signatures?"):
86
- sys.exit('Aborting update...')
87
- tmpdir = defaults['tmp_dir']
88
- if os.path.isdir(tmpdir):
89
- print("Found previously created temporary folder for download:", tmpdir)
90
- resume_download = query_yes_no('Do you want to resume download (yes) or start over (no)?')
91
- if resume_download:
92
- print("Resuming download...")
93
- else:
94
- print("Creating temporary folder for download:", tmpdir)
95
- try:
96
- os.mkdir(tmpdir)
97
- except:
98
- pass
99
- if not os.path.isdir(tmpdir):
100
- print("Failed to create temporary folder for PUID's, using", tmpdir)
101
-
102
- print("Downloading signatures, one moment please...")
103
- one_percent = (float(numberPuids) / 100)
104
- numfiles = 0
105
- for puid in puids:
106
- puidType, puidNum = puid.split("/")
107
- puidFileName = "puid." + puidType + "." + puidNum + ".xml"
108
- filename = os.path.join(tmpdir, puidFileName)
109
- if os.path.isfile(filename) and check_well_formedness(filename) and resume_download:
110
- numfiles += 1
111
- continue
112
- puid_url = "http://www.nationalarchives.gov.uk/pronom/{}.xml".format(puid)
113
- try:
114
- filehandle = urlopen(puid_url)
115
- except Exception as e:
116
- print("Failed to download signature file:", puid_url)
117
- print("Error:", str(e))
118
- sys.exit('Please restart and resume download.')
119
- with open(filename, 'wb') as file_:
120
- for lines in filehandle.readlines():
121
- file_.write(lines)
122
- filehandle.close()
123
- if not check_well_formedness(filename):
124
- os.unlink(filename)
125
- continue
126
- numfiles += 1
127
- percent = int(float(numfiles) / one_percent)
128
- print(r"{}/{} files [{}%]".format(numfiles, numberPuids, percent))
129
- time.sleep(defaults['http_throttle'])
130
- print("100%")
131
-
132
- print("Creating PRONOM zip...")
133
- compression = zipfile.ZIP_DEFLATED if 'zlib' in sys.modules else zipfile.ZIP_STORED
134
- modes = {zipfile.ZIP_DEFLATED: 'deflated', zipfile.ZIP_STORED: 'stored'}
135
- zf = zipfile.ZipFile(os.path.join(CONFIG_DIR, defaults['pronomZipFileName'].format(currentVersion)), mode='w')
136
- print("Adding files with compression mode", modes[compression])
137
- for puid in puids:
138
- puidType, puidNum = puid.split("/")
139
- puidFileName = "puid.{}.{}.xml".format(puidType, puidNum)
140
- filename = os.path.join(tmpdir, puidFileName)
141
- if os.path.isfile(filename):
142
- zf.write(filename, arcname=puidFileName, compress_type=compression)
143
- if defaults['deleteTempDirectory']:
144
- os.unlink(filename)
145
- zf.close()
146
-
147
- if defaults['deleteTempDirectory']:
148
- print("Deleting temporary folder and files...")
149
- rmtree(tmpdir, ignore_errors=True)
150
-
151
- print('Updating versions.xml...')
152
- versions = get_local_pronom_versions()
153
- versions.pronom_version = str(currentVersion)
154
- versions.pronom_signature = "formats-v" + str(currentVersion) + ".xml"
155
- versions.pronom_container_signature = defaults['containerVersion']
156
- versions.fido_extension_signature = defaults['fidoSignatureVersion']
157
- versions.update_script = __version__
158
- versions.write()
159
-
160
- # TODO: there should be a check here to handle prepare.main exit() signal (-1/0/1/...)
161
- print("Preparing to convert PRONOM formats to FIDO signatures...")
162
- prepare_pronom_to_fido()
163
- print("FIDO signatures successfully updated")
164
-
165
- except KeyboardInterrupt:
166
- sys.exit('Aborting update...')
167
-
168
-
169
- def main():
170
- """Main CLI entrypoint."""
171
- parser = ArgumentParser(description='Download and convert the latest PRONOM signatures')
172
- parser.add_argument('-tmpdir', default=options['tmp_dir'], help='Location to store temporary files', dest='tmp_dir')
173
- parser.add_argument('-keep_tmp', default=options['deleteTempDirectory'], help='Do not delete temporary files after completion', dest='deleteTempDirectory', action='store_false')
174
- parser.add_argument('-http_throttle', default=options['http_throttle'], help='Time (in seconds) to wait between downloads', type=float, dest='http_throttle')
175
- args = parser.parse_args()
176
- opts = defaults.copy()
177
- opts.update(vars(args))
178
-
179
- run(opts)
180
-
181
-
182
- if __name__ == '__main__':
183
- main()