libis-format 0.9.32 → 0.9.33

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. checksums.yaml +4 -4
  2. data/data/types.yml +30 -16
  3. data/lib/libis/format/config.rb +7 -18
  4. data/lib/libis/format/converter/image_converter.rb +6 -0
  5. data/lib/libis/format/droid.rb +82 -25
  6. data/lib/libis/format/extension_identification.rb +55 -0
  7. data/lib/libis/format/fido.rb +57 -72
  8. data/lib/libis/format/file_tool.rb +76 -0
  9. data/lib/libis/format/identification_tool.rb +174 -0
  10. data/lib/libis/format/identifier.rb +129 -117
  11. data/lib/libis/format/type_database.rb +36 -5
  12. data/lib/libis/format/version.rb +1 -1
  13. data/lib/libis/format.rb +3 -0
  14. data/libis-format.gemspec +2 -1
  15. data/spec/converter_spec.rb +6 -4
  16. data/spec/identifier_spec.rb +125 -34
  17. metadata +21 -126
  18. data/tools/droid/DROID_SignatureFile_V90.xml +0 -40182
  19. data/tools/droid/container-signature-20170330.xml +0 -3584
  20. data/tools/droid/droid-command-line-6.3.jar +0 -0
  21. data/tools/droid/droid.bat +0 -152
  22. data/tools/droid/droid.sh +0 -152
  23. data/tools/droid/lib/XmlSchema-1.4.7.jar +0 -0
  24. data/tools/droid/lib/activation-1.1.jar +0 -0
  25. data/tools/droid/lib/aopalliance-1.0.jar +0 -0
  26. data/tools/droid/lib/asm-2.2.3.jar +0 -0
  27. data/tools/droid/lib/aspectjrt-1.8.7.jar +0 -0
  28. data/tools/droid/lib/aspectjweaver-1.8.7.jar +0 -0
  29. data/tools/droid/lib/bcmail-jdk14-138.jar +0 -0
  30. data/tools/droid/lib/bcprov-jdk14-138.jar +0 -0
  31. data/tools/droid/lib/beansbinding-1.2.1.jar +0 -0
  32. data/tools/droid/lib/byteseek-2.0.3.jar +0 -0
  33. data/tools/droid/lib/cglib-nodep-2.2.2.jar +0 -0
  34. data/tools/droid/lib/classmate-1.0.0.jar +0 -0
  35. data/tools/droid/lib/commons-cli-1.2.jar +0 -0
  36. data/tools/droid/lib/commons-codec-1.10.jar +0 -0
  37. data/tools/droid/lib/commons-collections-3.2.2.jar +0 -0
  38. data/tools/droid/lib/commons-compress-1.4.1.jar +0 -0
  39. data/tools/droid/lib/commons-configuration-1.8.jar +0 -0
  40. data/tools/droid/lib/commons-dbcp-1.4.jar +0 -0
  41. data/tools/droid/lib/commons-httpclient-3.1.jar +0 -0
  42. data/tools/droid/lib/commons-io-2.4.jar +0 -0
  43. data/tools/droid/lib/commons-lang-2.6.jar +0 -0
  44. data/tools/droid/lib/commons-logging-1.1.1.jar +0 -0
  45. data/tools/droid/lib/commons-pool-1.5.4.jar +0 -0
  46. data/tools/droid/lib/cxf-api-2.2.12.jar +0 -0
  47. data/tools/droid/lib/cxf-common-schemas-2.2.12.jar +0 -0
  48. data/tools/droid/lib/cxf-common-utilities-2.2.12.jar +0 -0
  49. data/tools/droid/lib/cxf-rt-bindings-http-2.2.12.jar +0 -0
  50. data/tools/droid/lib/cxf-rt-bindings-soap-2.2.12.jar +0 -0
  51. data/tools/droid/lib/cxf-rt-bindings-xml-2.2.12.jar +0 -0
  52. data/tools/droid/lib/cxf-rt-core-2.2.12.jar +0 -0
  53. data/tools/droid/lib/cxf-rt-databinding-jaxb-2.2.12.jar +0 -0
  54. data/tools/droid/lib/cxf-rt-frontend-jaxws-2.2.12.jar +0 -0
  55. data/tools/droid/lib/cxf-rt-frontend-simple-2.2.12.jar +0 -0
  56. data/tools/droid/lib/cxf-rt-transports-http-2.2.12.jar +0 -0
  57. data/tools/droid/lib/cxf-rt-ws-addr-2.2.12.jar +0 -0
  58. data/tools/droid/lib/cxf-tools-common-2.2.12.jar +0 -0
  59. data/tools/droid/lib/de.huxhorn.lilith.3rdparty.flyingsaucer.core-renderer-8RC1.jar +0 -0
  60. data/tools/droid/lib/derby-10.10.2.0.jar +0 -0
  61. data/tools/droid/lib/droid-container-6.3.jar +0 -0
  62. data/tools/droid/lib/droid-core-6.3.jar +0 -0
  63. data/tools/droid/lib/droid-core-interfaces-6.3.jar +0 -0
  64. data/tools/droid/lib/droid-export-6.3.jar +0 -0
  65. data/tools/droid/lib/droid-export-interfaces-6.3.jar +0 -0
  66. data/tools/droid/lib/droid-help-6.3.jar +0 -0
  67. data/tools/droid/lib/droid-report-6.3.jar +0 -0
  68. data/tools/droid/lib/droid-report-interfaces-6.3.jar +0 -0
  69. data/tools/droid/lib/droid-results-6.3.jar +0 -0
  70. data/tools/droid/lib/geronimo-activation_1.1_spec-1.0.2.jar +0 -0
  71. data/tools/droid/lib/geronimo-annotation_1.0_spec-1.1.1.jar +0 -0
  72. data/tools/droid/lib/geronimo-javamail_1.4_spec-1.6.jar +0 -0
  73. data/tools/droid/lib/geronimo-jaxws_2.1_spec-1.0.jar +0 -0
  74. data/tools/droid/lib/geronimo-stax-api_1.0_spec-1.0.1.jar +0 -0
  75. data/tools/droid/lib/geronimo-ws-metadata_2.0_spec-1.1.2.jar +0 -0
  76. data/tools/droid/lib/hibernate-validator-5.1.0.Final.jar +0 -0
  77. data/tools/droid/lib/itext-2.0.8.jar +0 -0
  78. data/tools/droid/lib/javahelp-2.0.05.jar +0 -0
  79. data/tools/droid/lib/jaxb-api-2.1.jar +0 -0
  80. data/tools/droid/lib/jaxb-impl-2.1.13.jar +0 -0
  81. data/tools/droid/lib/jboss-logging-3.1.3.GA.jar +0 -0
  82. data/tools/droid/lib/joda-time-1.6.2.jar +0 -0
  83. data/tools/droid/lib/jra-1.0-alpha-4.jar +0 -0
  84. data/tools/droid/lib/jta-1.1.jar +0 -0
  85. data/tools/droid/lib/jwat-arc-1.0.3.jar +0 -0
  86. data/tools/droid/lib/jwat-archive-common-1.0.3.jar +0 -0
  87. data/tools/droid/lib/jwat-common-1.0.3.jar +0 -0
  88. data/tools/droid/lib/jwat-gzip-1.0.3.jar +0 -0
  89. data/tools/droid/lib/jwat-warc-1.0.2.jar +0 -0
  90. data/tools/droid/lib/log4j-1.2.13.jar +0 -0
  91. data/tools/droid/lib/neethi-2.0.4.jar +0 -0
  92. data/tools/droid/lib/opencsv-2.3.jar +0 -0
  93. data/tools/droid/lib/org-netbeans-swing-outline-7.2.jar +0 -0
  94. data/tools/droid/lib/org-openide-util-7.2.jar +0 -0
  95. data/tools/droid/lib/org-openide-util-lookup-7.2.jar +0 -0
  96. data/tools/droid/lib/poi-3.13.jar +0 -0
  97. data/tools/droid/lib/saaj-api-1.3.jar +0 -0
  98. data/tools/droid/lib/saaj-impl-1.3.2.jar +0 -0
  99. data/tools/droid/lib/slf4j-api-1.4.2.jar +0 -0
  100. data/tools/droid/lib/slf4j-log4j12-1.4.2.jar +0 -0
  101. data/tools/droid/lib/spring-aop-4.0.3.RELEASE.jar +0 -0
  102. data/tools/droid/lib/spring-beans-4.0.3.RELEASE.jar +0 -0
  103. data/tools/droid/lib/spring-context-4.0.3.RELEASE.jar +0 -0
  104. data/tools/droid/lib/spring-core-4.0.3.RELEASE.jar +0 -0
  105. data/tools/droid/lib/spring-expression-4.0.3.RELEASE.jar +0 -0
  106. data/tools/droid/lib/spring-jdbc-4.0.3.RELEASE.jar +0 -0
  107. data/tools/droid/lib/spring-orm-4.0.3.RELEASE.jar +0 -0
  108. data/tools/droid/lib/spring-tx-4.0.3.RELEASE.jar +0 -0
  109. data/tools/droid/lib/spring-web-2.5.6.jar +0 -0
  110. data/tools/droid/lib/stax-api-1.0-2.jar +0 -0
  111. data/tools/droid/lib/trove4j-3.0.3.jar +0 -0
  112. data/tools/droid/lib/truezip-6.8.4.jar +0 -0
  113. data/tools/droid/lib/validation-api-1.1.0.Final.jar +0 -0
  114. data/tools/droid/lib/wsdl4j-1.6.2.jar +0 -0
  115. data/tools/droid/lib/wstx-asl-3.2.9.jar +0 -0
  116. data/tools/droid/lib/xercesImpl-2.9.1.jar +0 -0
  117. data/tools/droid/lib/xml-apis-1.3.04.jar +0 -0
  118. data/tools/droid/lib/xml-resolver-1.2.jar +0 -0
  119. data/tools/droid/lib/xz-1.0.jar +0 -0
  120. data/tools/fido/__init__.py +0 -50
  121. data/tools/fido/conf/DROID_SignatureFile-v90.xml +0 -2
  122. data/tools/fido/conf/container-signature-20170330.xml +0 -3584
  123. data/tools/fido/conf/dc.xsd +0 -119
  124. data/tools/fido/conf/dcmitype.xsd +0 -53
  125. data/tools/fido/conf/dcterms.xsd +0 -383
  126. data/tools/fido/conf/fido-formats.xsd +0 -173
  127. data/tools/fido/conf/format_extension_template.xml +0 -105
  128. data/tools/fido/conf/format_extensions.xml +0 -484
  129. data/tools/fido/conf/formats-v90.xml +0 -48877
  130. data/tools/fido/conf/pronom-xml-v90.zip +0 -0
  131. data/tools/fido/conf/versions.xml +0 -8
  132. data/tools/fido/fido.bat +0 -4
  133. data/tools/fido/fido.py +0 -884
  134. data/tools/fido/fido.sh +0 -5
  135. data/tools/fido/package.py +0 -96
  136. data/tools/fido/prepare.py +0 -645
  137. data/tools/fido/pronomutils.py +0 -200
  138. data/tools/fido/toxml.py +0 -60
  139. data/tools/fido/update_signatures.py +0 -183
@@ -1,200 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- """
4
- PRONOM UTILS.
5
-
6
- PYTHON FUNCTION TO QUERY PRONOM VERSION
7
- AND DOWNLOAD SIGNATUREFILE
8
- USES PRONOM SOAP SERVICE
9
-
10
- Open Planets Foundation (http://www.openplanetsfoundation.org)
11
- See License.txt for license information.
12
- Download from: http://github.com/openplanets/fido/downloads
13
- Author: Maurice de Rooij (OPF/NANETH), 2012
14
-
15
- PRONOM UTILS is a library used by FIDO.
16
- FIDO uses the UK National Archives (TNA) PRONOM File Format and Container descriptions.
17
- PRONOM is available from http://www.nationalarchives.gov.uk/pronom/
18
- """
19
-
20
- from __future__ import absolute_import
21
-
22
- import os
23
- import re
24
- import sys
25
- from xml.etree import ElementTree as ET
26
- from xml.etree.ElementTree import parse, ParseError
27
- from xml.parsers.expat import ExpatError, ParserCreate
28
-
29
- import six
30
- from six.moves import http_client
31
-
32
- from . import __version__, CONFIG_DIR
33
-
34
-
35
- def check_well_formedness(filename, error=False):
36
- """
37
- Check if a given file contains valid XML.
38
-
39
- :param filename: file from which the XML is read.
40
- :param error: whether or not print to `stderr` upon error.
41
- :returns: whether the file contains valid XML.
42
- """
43
- parser = ParserCreate()
44
- try:
45
- parser.ParseFile(open(filename, "r"))
46
- except ExpatError as e:
47
- if error is not False:
48
- sys.stderr.write("check_well_formedness: %s: %s;\n" % (filename, e))
49
- return False
50
- return True
51
-
52
-
53
- def get_pronom_signature(type_):
54
- """
55
- Get PRONOM signature.
56
-
57
- Return latest signature file version number as int when `type_` equals
58
- "version" or return latest signature XML file as string when `type_` equals
59
- "file". Upon error, write to `stderr` and returls `False`.
60
- """
61
- try:
62
- soapVersionContainer = """<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><getSignatureFileVersionV1 xmlns="http://pronom.nationalarchives.gov.uk" /></soap:Body></soap:Envelope>"""
63
- soapFileContainer = """<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><getSignatureFileV1 xmlns="http://pronom.nationalarchives.gov.uk" /></soap:Body></soap:Envelope>"""
64
- soapVersionHeader = """\"http://pronom.nationalarchives.gov.uk:getSignatureFileVersionV1In\""""
65
- soapFileHeader = """\"http://pronom.nationalarchives.gov.uk:getSignatureFileV1In\""""
66
- if type_ == "version":
67
- soapAction = soapVersionHeader
68
- soapStr = soapVersionContainer
69
- elif type_ == "file":
70
- soapAction = soapFileHeader
71
- soapStr = soapFileContainer
72
- else:
73
- sys.stderr.write("get_pronom_signature(): unknown type: " + type_)
74
- return False
75
- webservice = http_client.HTTP("www.nationalarchives.gov.uk")
76
- webservice.putrequest("POST", "/pronom/service.asmx")
77
- webservice.putheader("Host", "www.nationalarchives.gov.uk")
78
- webservice.putheader("User-Agent", "PRONOM UTILS v{0} (OPF)".format(__version__))
79
- webservice.putheader("Content-type", "text/xml; charset=\"UTF-8\"")
80
- webservice.putheader("Content-length", "%d" % len(soapStr))
81
- webservice.putheader("SOAPAction", soapAction)
82
- try:
83
- webservice.endheaders()
84
- except Exception as e:
85
- sys.stderr.write("get_pronom_signature(): failed to contact PRONOM;\n%s\n" % (e))
86
- sys.exit()
87
- webservice.send(soapStr)
88
- statuscode, statusmessage, header = webservice.getreply()
89
- if statuscode == 200:
90
- xml = webservice.getfile()
91
- if type_ == "version":
92
- exp = re.compile("\<Version\>([0-9]{1,4})\<\/Version\>")
93
- sigxml = exp.search(xml.read())
94
- if len(sigxml.group(1)) > 0:
95
- return int(sigxml.group(1))
96
- else:
97
- sys.stderr.write("get_pronom_signature(): could not parse VERSION from SOAP response: " + type_)
98
- return False
99
- if type_ == "file":
100
- exp = re.compile("\<SignatureFile\>.*\<\/SignatureFile\>")
101
- sigxml = exp.search(xml.read())
102
- sigtxt = sigxml.group(0) if sigxml else ''
103
- if len(sigtxt) > 0:
104
- tmpfile = "./tmp_getPronomSignature.xml"
105
- with open(tmpfile, 'wb') as file_:
106
- file_.write("""<?xml version="1.0" encoding="UTF-8"?>""" + "\n")
107
- file_.write(sigtxt)
108
- if not check_well_formedness(tmpfile):
109
- os.unlink(tmpfile)
110
- sys.stderr.write("get_pronom_signature(): signaturefile not well formed")
111
- return False
112
- else:
113
- os.unlink(tmpfile)
114
- return """<?xml version="1.0" encoding="UTF-8"?>""" + "\n" + sigtxt
115
- else:
116
- sys.stderr.write("get_pronom_signature(): could not parse XML from SOAP response: " + type_)
117
- return False
118
- else:
119
- sys.stderr.write("get_pronom_signature(): webservice error: '" + str(statuscode) + " " + statusmessage + "'\n")
120
- return False
121
- sys.stderr.write("get_pronom_signature(): unexpected return")
122
- return False
123
- except Exception as e:
124
- sys.stderr.write("get_pronom_signature(): unknown error: " + str(e))
125
- return False
126
-
127
-
128
- class LocalPronomVersions(object):
129
- """
130
- Parse local PRONOM signature versions XML file.
131
-
132
- This is how the XML document should look like:
133
-
134
- <?xml version="1.0" encoding="UTF-8"?>
135
- <versions>
136
- <pronomVersion>84</pronomVersion>
137
- <pronomSignature>formats-v84.xml</pronomSignature>
138
- <pronomContainerSignature>container-signature-20160121.xml</pronomContainerSignature>
139
- <fidoExtensionSignature>format_extensions.xml</fidoExtensionSignature>
140
- <updateScript>1.2.2</updateScript>
141
- </versions>
142
- """
143
-
144
- PROPS_MAPPING = {
145
- 'pronom_version': 'pronomVersion',
146
- 'pronom_signature': 'pronomSignature',
147
- 'pronom_container_signature': 'pronomContainerSignature',
148
- 'fido_extension_signature': 'fidoExtensionSignature',
149
- 'update_script': 'updateScript',
150
- }
151
-
152
- ROOT_ELEMENT = 'versions'
153
-
154
- def __init__(self, versions_file):
155
- """Instantiate class based on the file indicated in `versions_file`."""
156
- self.versions_file = versions_file
157
- self.conf_dir = os.path.abspath(os.path.dirname(versions_file))
158
- try:
159
- self.tree = parse(versions_file)
160
- self.root = self.tree.getroot()
161
- except (ParseError, IOError):
162
- self.root = ET.Element(self.ROOT_ELEMENT)
163
- self.tree = ET.ElementTree(self.root)
164
-
165
- def __getattr__(self, name):
166
- """Extract the element's text content."""
167
- if name in self.PROPS_MAPPING:
168
- return self.root.find(self.PROPS_MAPPING[name]).text
169
-
170
- def __setattr__(self, name, value):
171
- """Update the element's text content."""
172
- if name in self.PROPS_MAPPING:
173
- try:
174
- self.root.find(self.PROPS_MAPPING[name]).text = value
175
- except AttributeError:
176
- elem = ET.SubElement(self.root, self.PROPS_MAPPING[name])
177
- elem.text = value
178
- else:
179
- object.__setattr__(self, name, value)
180
-
181
- def get_zip_file(self):
182
- """Obtain location to the PRONOM XML Zip file based on the current PRONOM version."""
183
- return os.path.join(self.conf_dir, 'pronom-xml-v{}.zip'.format(self.pronom_version))
184
-
185
- def get_signature_file(self):
186
- """Obtain location to the current PRONOM signature file."""
187
- return os.path.join(self.conf_dir, self.pronom_signature)
188
-
189
- def write(self):
190
- """Update versions.xml."""
191
- # Check that all the fields are defined
192
- for key, value in six.iteritems(self.PROPS_MAPPING):
193
- if self.root.find(value) is None:
194
- raise ValueError('Field {} has not been defined!'.format(key))
195
- self.tree.write(self.versions_file, xml_declaration=True, method='xml', encoding='utf-8')
196
-
197
-
198
- def get_local_pronom_versions(config_dir=CONFIG_DIR):
199
- """Return an instance of LocalPronomVersions loaded with `conf/versions.xml`."""
200
- return LocalPronomVersions(os.path.join(config_dir, 'versions.xml'))
data/tools/fido/toxml.py DELETED
@@ -1,60 +0,0 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8 -*-
3
-
4
- """
5
- FIDO CSV output to XML.
6
-
7
- Author: Maurice de Rooij <maurice.de.rooij@nationaalarchief.nl>, September 2011
8
-
9
- Usage in combination with FIDO:
10
- - Windows: python fido.py [ARGS] | python toxml.py > output.xml
11
- - Linux: fido.py [ARGS] | toxml.py > output.xml
12
-
13
- Usage afterwards:
14
- - Windows: type output.csv | toxml.py > output.xml
15
- - Linux: cat output.csv | toxml.py > output.xml
16
-
17
- For difference in usage, see:
18
- - http://bugs.python.org/issue9390
19
- - http://support.microsoft.com/default.aspx?kbid=321788
20
- """
21
-
22
- from __future__ import absolute_import
23
-
24
- import csv
25
- import sys
26
-
27
- from . import __version__
28
- from .pronomutils import get_local_pronom_versions
29
-
30
-
31
- def main():
32
- """Generate XML as read from CSV and send it to the standard output stream."""
33
- sys.stdout.write("""<?xml version="1.0" encoding="utf-8"?>
34
- <fido_output>
35
- <versions>
36
- <fido_version>{0}</fido_version>
37
- <signature_version>{1}</signature_version>
38
- </versions>""".format(__version__, get_local_pronom_versions().pronom_version))
39
-
40
- reader = csv.reader(sys.stdin)
41
-
42
- for row in reader:
43
- sys.stdout.write("""
44
- <file>
45
- <filename>{0}</filename>
46
- <status>{1}</status>
47
- <matchtype>{2}</matchtype>
48
- <time>{3}</time>
49
- <puid>{4}</puid>
50
- <mimetype>{5}</mimetype>
51
- <formatname>{6}</formatname>
52
- <signaturename>{7}</signaturename>
53
- <filesize>{8}</filesize>
54
- </file>""".format(row[6], row[0], row[8], row[1], row[2], row[7], row[3], row[4], row[5]))
55
-
56
- sys.stdout.write("\n</fido_output>\n")
57
-
58
-
59
- if __name__ == '__main__':
60
- main()
@@ -1,183 +0,0 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8 -*-
3
-
4
- """
5
- FIDO SIGNATURE UPDATER.
6
-
7
- Open Planets Foundation (http://www.openplanetsfoundation.org)
8
- See License.txt for license information.
9
- Download from: https://github.com/openplanets/fido/releases
10
- Author: Maurice de Rooij (NANETH), 2012
11
-
12
- FIDO uses the UK National Archives (TNA) PRONOM File Format and Container descriptions.
13
- PRONOM is available from http://www.nationalarchives.gov.uk/pronom/.
14
- """
15
-
16
- from __future__ import print_function
17
-
18
- from argparse import ArgumentParser
19
- import os
20
- from shutil import rmtree
21
- import sys
22
- import time
23
- from xml.etree import ElementTree as CET
24
- import zipfile
25
-
26
- from six.moves.urllib.request import urlopen
27
-
28
- from . import __version__, CONFIG_DIR, query_yes_no
29
- from .prepare import run as prepare_pronom_to_fido
30
- from .pronomutils import check_well_formedness, get_local_pronom_versions, get_pronom_signature
31
-
32
-
33
- defaults = {
34
- 'signatureFileName': 'DROID_SignatureFile-v{0}.xml',
35
- 'pronomZipFileName': 'pronom-xml-v{0}.zip',
36
- 'fidoSignatureVersion': 'format_extensions.xml',
37
- 'containerVersion': 'container-signature-20160121.xml', # container version is frozen and needs human attention before updating,
38
- }
39
-
40
- options = {
41
- 'http_throttle': 0.5, # in secs, to prevent DoS of PRONOM server
42
- 'tmp_dir': os.path.join(CONFIG_DIR, 'tmp'),
43
- 'deleteTempDirectory': True,
44
- }
45
-
46
-
47
- def run(defaults=defaults):
48
- """
49
- Update PRONOM signatures.
50
-
51
- Interactive script, requires keyboard input.
52
- """
53
- print("FIDO signature updater v{}".format(__version__))
54
-
55
- try:
56
- print("Contacting PRONOM...")
57
- currentVersion = get_pronom_signature("version")
58
- if not currentVersion:
59
- sys.exit('Failed to obtain PRONOM signature file version number, please try again.')
60
-
61
- print("Querying latest signaturefile version...")
62
- signatureFile = os.path.join(CONFIG_DIR, defaults['signatureFileName'].format(currentVersion))
63
- if os.path.isfile(signatureFile):
64
- print("You already have the latest PRONOM signature file, version", currentVersion)
65
- if not query_yes_no("Update anyway?"):
66
- sys.exit('Aborting update...')
67
-
68
- print("Downloading signature file version {}...".format(currentVersion))
69
- currentFile = get_pronom_signature("file")
70
- if not currentFile:
71
- sys.exit('Failed to obtain PRONOM signature file, please try again.')
72
- print("Writing {0}...".format(defaults['signatureFileName'].format(currentVersion)))
73
- with open(signatureFile, 'wb') as file_:
74
- file_.write(currentFile)
75
-
76
- print("Extracting PRONOM PUID's from signature file...")
77
- tree = CET.parse(signatureFile)
78
- puids = []
79
- for node in tree.iter("{http://www.nationalarchives.gov.uk/pronom/SignatureFile}FileFormat"):
80
- puids.append(node.get("PUID"))
81
- numberPuids = len(puids)
82
- print("Found {} PRONOM PUID's".format(numberPuids))
83
-
84
- print("Downloading signatures can take a while")
85
- if not query_yes_no("Continue and download signatures?"):
86
- sys.exit('Aborting update...')
87
- tmpdir = defaults['tmp_dir']
88
- if os.path.isdir(tmpdir):
89
- print("Found previously created temporary folder for download:", tmpdir)
90
- resume_download = query_yes_no('Do you want to resume download (yes) or start over (no)?')
91
- if resume_download:
92
- print("Resuming download...")
93
- else:
94
- print("Creating temporary folder for download:", tmpdir)
95
- try:
96
- os.mkdir(tmpdir)
97
- except:
98
- pass
99
- if not os.path.isdir(tmpdir):
100
- print("Failed to create temporary folder for PUID's, using", tmpdir)
101
-
102
- print("Downloading signatures, one moment please...")
103
- one_percent = (float(numberPuids) / 100)
104
- numfiles = 0
105
- for puid in puids:
106
- puidType, puidNum = puid.split("/")
107
- puidFileName = "puid." + puidType + "." + puidNum + ".xml"
108
- filename = os.path.join(tmpdir, puidFileName)
109
- if os.path.isfile(filename) and check_well_formedness(filename) and resume_download:
110
- numfiles += 1
111
- continue
112
- puid_url = "http://www.nationalarchives.gov.uk/pronom/{}.xml".format(puid)
113
- try:
114
- filehandle = urlopen(puid_url)
115
- except Exception as e:
116
- print("Failed to download signature file:", puid_url)
117
- print("Error:", str(e))
118
- sys.exit('Please restart and resume download.')
119
- with open(filename, 'wb') as file_:
120
- for lines in filehandle.readlines():
121
- file_.write(lines)
122
- filehandle.close()
123
- if not check_well_formedness(filename):
124
- os.unlink(filename)
125
- continue
126
- numfiles += 1
127
- percent = int(float(numfiles) / one_percent)
128
- print(r"{}/{} files [{}%]".format(numfiles, numberPuids, percent))
129
- time.sleep(defaults['http_throttle'])
130
- print("100%")
131
-
132
- print("Creating PRONOM zip...")
133
- compression = zipfile.ZIP_DEFLATED if 'zlib' in sys.modules else zipfile.ZIP_STORED
134
- modes = {zipfile.ZIP_DEFLATED: 'deflated', zipfile.ZIP_STORED: 'stored'}
135
- zf = zipfile.ZipFile(os.path.join(CONFIG_DIR, defaults['pronomZipFileName'].format(currentVersion)), mode='w')
136
- print("Adding files with compression mode", modes[compression])
137
- for puid in puids:
138
- puidType, puidNum = puid.split("/")
139
- puidFileName = "puid.{}.{}.xml".format(puidType, puidNum)
140
- filename = os.path.join(tmpdir, puidFileName)
141
- if os.path.isfile(filename):
142
- zf.write(filename, arcname=puidFileName, compress_type=compression)
143
- if defaults['deleteTempDirectory']:
144
- os.unlink(filename)
145
- zf.close()
146
-
147
- if defaults['deleteTempDirectory']:
148
- print("Deleting temporary folder and files...")
149
- rmtree(tmpdir, ignore_errors=True)
150
-
151
- print('Updating versions.xml...')
152
- versions = get_local_pronom_versions()
153
- versions.pronom_version = str(currentVersion)
154
- versions.pronom_signature = "formats-v" + str(currentVersion) + ".xml"
155
- versions.pronom_container_signature = defaults['containerVersion']
156
- versions.fido_extension_signature = defaults['fidoSignatureVersion']
157
- versions.update_script = __version__
158
- versions.write()
159
-
160
- # TODO: there should be a check here to handle prepare.main exit() signal (-1/0/1/...)
161
- print("Preparing to convert PRONOM formats to FIDO signatures...")
162
- prepare_pronom_to_fido()
163
- print("FIDO signatures successfully updated")
164
-
165
- except KeyboardInterrupt:
166
- sys.exit('Aborting update...')
167
-
168
-
169
- def main():
170
- """Main CLI entrypoint."""
171
- parser = ArgumentParser(description='Download and convert the latest PRONOM signatures')
172
- parser.add_argument('-tmpdir', default=options['tmp_dir'], help='Location to store temporary files', dest='tmp_dir')
173
- parser.add_argument('-keep_tmp', default=options['deleteTempDirectory'], help='Do not delete temporary files after completion', dest='deleteTempDirectory', action='store_false')
174
- parser.add_argument('-http_throttle', default=options['http_throttle'], help='Time (in seconds) to wait between downloads', type=float, dest='http_throttle')
175
- args = parser.parse_args()
176
- opts = defaults.copy()
177
- opts.update(vars(args))
178
-
179
- run(opts)
180
-
181
-
182
- if __name__ == '__main__':
183
- main()