pymetadata 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pymetadata might be problematic. Click here for more details.

Files changed (42) hide show
  1. pymetadata/__init__.py +14 -0
  2. pymetadata/cache.py +52 -0
  3. pymetadata/chebi.py +92 -0
  4. pymetadata/console.py +18 -0
  5. pymetadata/core/__init__.py +1 -0
  6. pymetadata/core/annotation.py +396 -0
  7. pymetadata/core/creator.py +46 -0
  8. pymetadata/core/synonym.py +12 -0
  9. pymetadata/core/xref.py +66 -0
  10. pymetadata/examples/__init__.py +1 -0
  11. pymetadata/examples/cache_path_example.py +15 -0
  12. pymetadata/examples/omex_example.py +46 -0
  13. pymetadata/examples/results/test_from_files.omex +0 -0
  14. pymetadata/examples/results/test_from_omex.omex +0 -0
  15. pymetadata/examples/results/testomex/README.md +3 -0
  16. pymetadata/examples/results/testomex/manifest.xml +9 -0
  17. pymetadata/examples/results/testomex/models/omex_comp.xml +174 -0
  18. pymetadata/examples/results/testomex/models/omex_comp_flat.xml +215 -0
  19. pymetadata/examples/results/testomex/models/omex_minimal.xml +99 -0
  20. pymetadata/examples/test.omex +0 -0
  21. pymetadata/identifiers/__init__.py +1 -0
  22. pymetadata/identifiers/miriam.py +43 -0
  23. pymetadata/identifiers/registry.py +397 -0
  24. pymetadata/log.py +29 -0
  25. pymetadata/metadata/__init__.py +6 -0
  26. pymetadata/metadata/eco.py +15918 -0
  27. pymetadata/metadata/kisao.py +2731 -0
  28. pymetadata/metadata/sbo.py +3754 -0
  29. pymetadata/omex.py +771 -0
  30. pymetadata/omex_v2.py +30 -0
  31. pymetadata/ontologies/__init__.py +1 -0
  32. pymetadata/ontologies/ols.py +214 -0
  33. pymetadata/ontologies/ontology.py +312 -0
  34. pymetadata/py.typed +0 -0
  35. pymetadata/resources/chebi_webservice_wsdl.xml +509 -0
  36. pymetadata/resources/ontologies/README.md +4 -0
  37. pymetadata/resources/templates/ontology_enum.pytemplate +61 -0
  38. pymetadata/unichem.py +190 -0
  39. pymetadata-0.5.0.dist-info/METADATA +154 -0
  40. pymetadata-0.5.0.dist-info/RECORD +42 -0
  41. pymetadata-0.5.0.dist-info/WHEEL +4 -0
  42. pymetadata-0.5.0.dist-info/licenses/LICENSE +7 -0
pymetadata/omex.py ADDED
@@ -0,0 +1,771 @@
1
+ """
2
+ COMBINE Archive support.
3
+
4
+ This module provides an abstraction around the COMBINE archive. Common operations
5
+ such as archive creation, archive extraction, creating archives from entries or
6
+ directories, working with the `manifest.xml` are implemented.
7
+
8
+ When working with COMBINE archives these wrapper functions should be used.
9
+ The current version has no support for metadata manipulation.
10
+
11
+ Encrypted archives can be opened, but no support for encrypting archives yet.
12
+ """
13
+
14
+ import os
15
+ import pprint
16
+ import shutil
17
+ import tempfile
18
+ import zipfile
19
+ from enum import Enum
20
+ from pathlib import Path
21
+ from typing import Dict, List, Optional
22
+
23
+ import xmltodict
24
+ from pydantic import BaseModel, PrivateAttr
25
+
26
+ from pymetadata import log
27
+
28
+
29
+ logger = log.get_logger(__name__)
30
+
31
+
32
+ __all__ = ["EntryFormat", "ManifestEntry", "Manifest", "Omex"]
33
+
34
+
35
+ IDENTIFIERS_PREFIX = "http://identifiers.org/combine.specifications/"
36
+ PURL_PREFIX = "https://purl.org/NET/mediatypes/"
37
+
38
+
39
+ class EntryFormat(str, Enum):
40
+ """Enum for common formats."""
41
+
42
+ OMEX = IDENTIFIERS_PREFIX + "omex"
43
+ OMEX_MANIFEST = IDENTIFIERS_PREFIX + "omex-manifest"
44
+ OMEX_METADATA = IDENTIFIERS_PREFIX + "omex-metadata"
45
+
46
+ SBML = IDENTIFIERS_PREFIX + "sbml"
47
+ SBML_L1V1 = (IDENTIFIERS_PREFIX + "sbml.level-1.version-1",)
48
+ SBML_L1V2 = (IDENTIFIERS_PREFIX + "sbml.level-1.version-2",)
49
+ SBML_L2V1 = IDENTIFIERS_PREFIX + "sbml.level-2.version-1"
50
+ SBML_L2V2 = IDENTIFIERS_PREFIX + "sbml.level-2.version-2"
51
+ SBML_L2V3 = IDENTIFIERS_PREFIX + "sbml.level-2.version-3"
52
+ SBML_L2V4 = IDENTIFIERS_PREFIX + "sbml.level-2.version-4"
53
+ SBML_L2V5 = IDENTIFIERS_PREFIX + "sbml.level-2.version-5"
54
+ SBML_L3V1 = IDENTIFIERS_PREFIX + "sbml.level-3.version-1"
55
+ SBML_L3V2 = IDENTIFIERS_PREFIX + "sbml.level-3.version-2"
56
+
57
+ SEDML = IDENTIFIERS_PREFIX + "sed-ml"
58
+ SEDML_L1V1 = IDENTIFIERS_PREFIX + "sed-ml.level-1.version-1"
59
+ SEDML_L1V2 = IDENTIFIERS_PREFIX + "sed-ml.level-1.version-2"
60
+ SEDML_L1V3 = IDENTIFIERS_PREFIX + "sed-ml.level-1.version-3"
61
+ SEDML_L1V4 = IDENTIFIERS_PREFIX + "sed-ml.level-1.version-4"
62
+
63
+ BIOPAX = IDENTIFIERS_PREFIX + "biopax"
64
+ CELLML = IDENTIFIERS_PREFIX + "cellml"
65
+ SBGN = IDENTIFIERS_PREFIX + "sbgn"
66
+ SBGN_PD = IDENTIFIERS_PREFIX + "sbgn.pd"
67
+
68
+ FROG_JSON_V1 = IDENTIFIERS_PREFIX + "frog-json-version-1"
69
+ FROG_METADATA_V1 = IDENTIFIERS_PREFIX + "frog-metadata-version-1"
70
+ FROG_OBJECTIVE_V1 = IDENTIFIERS_PREFIX + "frog-objective-version-1"
71
+ FROG_FVA_V1 = IDENTIFIERS_PREFIX + "frog-fva-version-1"
72
+ FROG_GENEDELETION_V1 = IDENTIFIERS_PREFIX + "frog-genedeletion-version-1"
73
+ FROG_REACTIONDELETION_V1 = IDENTIFIERS_PREFIX + "frog-reactiondeletion-version-1"
74
+
75
+ MARKDOWN = PURL_PREFIX + "text/x-markdown"
76
+ PLAIN = PURL_PREFIX + "text/plain"
77
+ XML = PURL_PREFIX + "application/xml"
78
+ RDF = PURL_PREFIX + "application/xml"
79
+ OWL = PURL_PREFIX + "application/xml"
80
+ SCI = PURL_PREFIX + "application/x-scilab"
81
+ XPP = PURL_PREFIX + "text/plain"
82
+
83
+ COPASI = PURL_PREFIX + "application/x-copasi"
84
+ SEDX = PURL_PREFIX + "application/x-sed-ml-archive"
85
+ PNG = PURL_PREFIX + "image/png"
86
+ CSV = PURL_PREFIX + "text/csv"
87
+ H323 = PURL_PREFIX + "text/h323"
88
+ ACX = PURL_PREFIX + "application/internet-property-stream"
89
+ AI = PURL_PREFIX + "application/postscript"
90
+ AIF = PURL_PREFIX + "audio/x-aiff"
91
+ AIFC = PURL_PREFIX + "audio/x-aiff"
92
+ AIFF = PURL_PREFIX + "audio/x-aiff"
93
+ ASF = PURL_PREFIX + "video/x-ms-asf"
94
+ ASR = PURL_PREFIX + "video/x-ms-asf"
95
+ ASX = PURL_PREFIX + "video/x-ms-asf"
96
+ AU = PURL_PREFIX + "audio/basic"
97
+ AVI = PURL_PREFIX + "video/x-msvideo"
98
+ AXS = PURL_PREFIX + "application/olescript"
99
+ BAS = PURL_PREFIX + "text/plain"
100
+ BCPIO = PURL_PREFIX + "application/x-bcpio"
101
+ BIN = PURL_PREFIX + "application/octet-stream"
102
+ BMP = PURL_PREFIX + "image/bmp"
103
+ C = PURL_PREFIX + "text/plain"
104
+ CAT = PURL_PREFIX + "application/vnd.ms-pkiseccat"
105
+ CDF = PURL_PREFIX + "application/x-cdf"
106
+ CER = PURL_PREFIX + "application/x-x509-ca-cert"
107
+ CLP = PURL_PREFIX + "application/x-msclip"
108
+ CMX = PURL_PREFIX + "image/x-cmx"
109
+ COD = PURL_PREFIX + "image/cis-cod"
110
+ CPIO = PURL_PREFIX + "application/x-cpio"
111
+ CRD = PURL_PREFIX + "application/x-mscardfile"
112
+ CRL = PURL_PREFIX + "application/pkix-crl"
113
+ CRT = PURL_PREFIX + "application/x-x509-ca-cert"
114
+ CSH = PURL_PREFIX + "application/x-csh"
115
+ CSS = PURL_PREFIX + "text/css"
116
+ DCR = PURL_PREFIX + "application/x-director"
117
+ DER = PURL_PREFIX + "application/x-x509-ca-cert"
118
+ DIR = PURL_PREFIX + "application/x-director"
119
+ DLL = PURL_PREFIX + "application/x-msdownload"
120
+ DMS = PURL_PREFIX + "application/octet-stream"
121
+ DOC = PURL_PREFIX + "application/msword"
122
+ DOT = PURL_PREFIX + "application/msword"
123
+ DVI = PURL_PREFIX + "application/x-dvi"
124
+ DXR = PURL_PREFIX + "application/x-director"
125
+ EPS = PURL_PREFIX + "application/postscript"
126
+ ETX = PURL_PREFIX + "text/x-setext"
127
+ EVY = PURL_PREFIX + "application/envoy"
128
+ EXE = PURL_PREFIX + "application/octet-stream"
129
+ FIF = PURL_PREFIX + "application/fractals"
130
+ FLR = PURL_PREFIX + "x-world/x-vrml"
131
+ GIF = PURL_PREFIX + "image/gif"
132
+ GTAR = PURL_PREFIX + "application/x-gtar"
133
+ GZ = PURL_PREFIX + "application/x-gzip"
134
+ H = PURL_PREFIX + "text/plain"
135
+ HDF = PURL_PREFIX + "application/x-hdf"
136
+ H5 = PURL_PREFIX + "application/x-hdf"
137
+ HLP = PURL_PREFIX + "application/winhlp"
138
+ HQT = PURL_PREFIX + "application/mac-binhex40"
139
+ HTA = PURL_PREFIX + "application/hta"
140
+ HTC = PURL_PREFIX + "text/x-component"
141
+ HTM = PURL_PREFIX + "text/html"
142
+ HTML = PURL_PREFIX + "text/html"
143
+ HTT = PURL_PREFIX + "text/webviewhtml"
144
+ ICO = PURL_PREFIX + "image/x-icon"
145
+ IEF = PURL_PREFIX + "image/ief"
146
+ III = PURL_PREFIX + "application/x-iphone"
147
+ INS = PURL_PREFIX + "application/x-internet-signup"
148
+ ISP = PURL_PREFIX + "application/x-internet-signup"
149
+ JFIF = PURL_PREFIX + "image/pipeg"
150
+ JPE = PURL_PREFIX + "image/jpeg"
151
+ JPEG = PURL_PREFIX + "image/jpeg"
152
+ JPG = PURL_PREFIX + "image/jpeg"
153
+ JS = PURL_PREFIX + "application/x-javascript"
154
+ LATEX = PURL_PREFIX + "application/x-latex"
155
+ LHA = PURL_PREFIX + "application/octet-stream"
156
+ LSF = PURL_PREFIX + "video/x-la-asf"
157
+ LSX = PURL_PREFIX + "video/x-la-asf"
158
+ LZH = PURL_PREFIX + "application/octet-stream"
159
+ M = PURL_PREFIX + "application/x-matlab"
160
+ MAT = PURL_PREFIX + "application/x-matlab-data"
161
+ M13 = PURL_PREFIX + "application/x-msmediaview"
162
+ M14 = PURL_PREFIX + "application/x-msmediaview"
163
+ M3U = PURL_PREFIX + "audio/x-mpegurl"
164
+ MAN = PURL_PREFIX + "application/x-troff-man"
165
+ MDB = PURL_PREFIX + "application/x-msaccess"
166
+ ME = PURL_PREFIX + "application/x-troff-me"
167
+ MHT = PURL_PREFIX + "message/rfc822"
168
+ MHTML = PURL_PREFIX + "message/rfc822"
169
+ MID = PURL_PREFIX + "audio/mid"
170
+ MNY = PURL_PREFIX + "application/x-msmoney"
171
+ MOV = PURL_PREFIX + "video/quicktime"
172
+ MOVIE = PURL_PREFIX + "video/x-sgi-movie"
173
+ MP2 = PURL_PREFIX + "video/mpeg"
174
+ MP3 = PURL_PREFIX + "audio/mpeg"
175
+ MP4 = PURL_PREFIX + "video/mpeg"
176
+ MPE = PURL_PREFIX + "video/mpeg"
177
+ MPEG = PURL_PREFIX + "video/mpeg"
178
+ MPG = PURL_PREFIX + "video/mpeg"
179
+ MPP = PURL_PREFIX + "application/vnd.ms-project"
180
+ MPV2 = PURL_PREFIX + "video/mpeg"
181
+ MS = PURL_PREFIX + "application/x-troff-ms"
182
+ MVB = PURL_PREFIX + "application/x-msmediaview"
183
+ NWS = PURL_PREFIX + "message/rfc822"
184
+ ODA = PURL_PREFIX + "application/oda"
185
+ P10 = PURL_PREFIX + "application/pkcs10"
186
+ P12 = PURL_PREFIX + "application/x-pkcs12"
187
+ P7B = PURL_PREFIX + "application/x-pkcs7-certificates"
188
+ P7C = PURL_PREFIX + "application/x-pkcs7-mime"
189
+ P7M = PURL_PREFIX + "application/x-pkcs7-mime"
190
+ P7R = PURL_PREFIX + "application/x-pkcs7-certreqresp"
191
+ P7S = PURL_PREFIX + "application/x-pkcs7-signature"
192
+ PBM = PURL_PREFIX + "image/x-portable-bitmap"
193
+ PDF = PURL_PREFIX + "application/pdf"
194
+ PFX = PURL_PREFIX + "application/x-pkcs12"
195
+ PGM = PURL_PREFIX + "image/x-portable-graymap"
196
+ PKO = PURL_PREFIX + "application/ynd.ms-pkipko"
197
+ PMA = PURL_PREFIX + "application/x-perfmon"
198
+ PMC = PURL_PREFIX + "application/x-perfmon"
199
+ PML = PURL_PREFIX + "application/x-perfmon"
200
+ PMR = PURL_PREFIX + "application/x-perfmon"
201
+ PMW = PURL_PREFIX + "application/x-perfmon"
202
+ PNW = PURL_PREFIX + "image/x-portable-anymap"
203
+ POT = PURL_PREFIX + "application/vnd.ms-powerpoint"
204
+ PPM = PURL_PREFIX + "image/x-portable-pixmap"
205
+ PPS = PURL_PREFIX + "application/vnd.ms-powerpoint"
206
+ PPT = PURL_PREFIX + "application/vnd.ms-powerpoint"
207
+ PRF = PURL_PREFIX + "application/pics-rules"
208
+ PS = PURL_PREFIX + "application/postscript"
209
+ PUB = PURL_PREFIX + "application/x-mspublisher"
210
+ QT = PURL_PREFIX + "video/quicktime"
211
+ RA = PURL_PREFIX + "audio/x-pn-realaudio"
212
+ RAM = PURL_PREFIX + "audio/x-pn-realaudio"
213
+ RAS = PURL_PREFIX + "image/x-cmu-raster"
214
+ RGB = PURL_PREFIX + "image/x-rgb"
215
+ RMI = PURL_PREFIX + "audio/mid"
216
+ ROFF = PURL_PREFIX + "application/x-troff"
217
+ RTF = PURL_PREFIX + "application/rtf"
218
+ RTX = PURL_PREFIX + "text/richtext"
219
+ SCD = PURL_PREFIX + "application/x-msschedule"
220
+ SCT = PURL_PREFIX + "text/scriptlet"
221
+ SETPAY = PURL_PREFIX + "application/set-payment-initiation"
222
+ SETREG = PURL_PREFIX + "application/set-registration-initiation"
223
+ SH = PURL_PREFIX + "application/x-sh"
224
+ SHAR = PURL_PREFIX + "application/x-shar"
225
+ SIT = PURL_PREFIX + "application/x-stuffit"
226
+ SND = PURL_PREFIX + "audio/basic"
227
+ SPC = PURL_PREFIX + "application/x-pkcs7-certificates"
228
+ SPL = PURL_PREFIX + "application/futuresplash"
229
+ SRC = PURL_PREFIX + "application/x-wais-source"
230
+ SST = PURL_PREFIX + "application/vnd.ms-pkicertstore"
231
+ STL = PURL_PREFIX + "application/vnd.ms-pkistl"
232
+ STM = PURL_PREFIX + "text/html"
233
+ SVG = PURL_PREFIX + "image/svg+xml"
234
+ SV4CPIO = PURL_PREFIX + "application/x-sv4cpio"
235
+ SV4CRC = PURL_PREFIX + "application/x-sv4crc"
236
+ SWF = PURL_PREFIX + "application/x-shockwave-flash"
237
+ T = PURL_PREFIX + "application/x-troff"
238
+ TAR = PURL_PREFIX + "application/x-tar"
239
+ TCL = PURL_PREFIX + "application/x-tcl"
240
+ TEX = PURL_PREFIX + "application/x-tex"
241
+ TEXI = PURL_PREFIX + "application/x-texinfo"
242
+ TEXINFO = PURL_PREFIX + "application/x-texinfo"
243
+ TGZ = PURL_PREFIX + "application/x-compressed"
244
+ TIF = PURL_PREFIX + "image/tiff"
245
+ TIFF = PURL_PREFIX + "image/tiff"
246
+ TR = PURL_PREFIX + "application/x-troff"
247
+ TRM = PURL_PREFIX + "application/x-msterminal"
248
+ TSV = PURL_PREFIX + "text/tab-separated-values"
249
+ TXT = PURL_PREFIX + "text/plain"
250
+ ULS = PURL_PREFIX + "text/iuls"
251
+ USTAR = PURL_PREFIX + "application/x-ustar"
252
+ VCF = PURL_PREFIX + "text/x-vcard"
253
+ VRML = PURL_PREFIX + "x-world/x-vrml"
254
+ WAV = PURL_PREFIX + "audio/x-wav"
255
+ WCM = PURL_PREFIX + "application/vnd.ms-works"
256
+ WDB = PURL_PREFIX + "application/vnd.ms-works"
257
+ WKS = PURL_PREFIX + "application/vnd.ms-works"
258
+ WMF = PURL_PREFIX + "application/x-msmetafile"
259
+ WPS = PURL_PREFIX + "application/vnd.ms-works"
260
+ WRI = PURL_PREFIX + "application/x-mswrite"
261
+ WRL = PURL_PREFIX + "x-world/x-vrml"
262
+ WRZ = PURL_PREFIX + "x-world/x-vrml"
263
+ XAF = PURL_PREFIX + "x-world/x-vrml"
264
+ XBM = PURL_PREFIX + "image/x-xbitmap"
265
+ XLA = PURL_PREFIX + "application/vnd.ms-excel"
266
+ XLC = PURL_PREFIX + "application/vnd.ms-excel"
267
+ XLM = PURL_PREFIX + "application/vnd.ms-excel"
268
+ XLS = PURL_PREFIX + "application/vnd.ms-excel"
269
+ XLT = PURL_PREFIX + "application/vnd.ms-excel"
270
+ XLW = PURL_PREFIX + "application/vnd.ms-excel"
271
+ XOF = PURL_PREFIX + "x-world/x-vrml"
272
+ XPM = PURL_PREFIX + "image/x-xpixmap"
273
+ XWD = PURL_PREFIX + "image/x-xwindowdump"
274
+ YAML = PURL_PREFIX + "text/yaml"
275
+ Z = PURL_PREFIX + "application/x-compress"
276
+ ZIP = PURL_PREFIX + "application/zip"
277
+
278
+
279
+ class ManifestEntry(BaseModel):
280
+ """Entry of an OMEX file listed in the `manifest.xml`.
281
+
282
+ This corresponds to a single file in the archive which is tracked in the
283
+ manifest.xml.
284
+ location: location of the entry
285
+ format: full format string
286
+ master: master attribute
287
+ """
288
+
289
+ location: str
290
+ format: str
291
+ master: bool = False
292
+
293
+ # pydantic configuration
294
+ model_config = {
295
+ "use_enum_values": True,
296
+ }
297
+
298
+ @staticmethod
299
+ def is_format(format_key: str, format: str) -> bool:
300
+ """Check if entry is of the given format_key."""
301
+ # FIXME: use regular expressions
302
+ if format_key == "sbml":
303
+ return ("identifiers.org/combine.specifications/sbml" in format) or (
304
+ "identifiers.org/combine.specifications:sbml" in format
305
+ )
306
+ if format_key == "sedml":
307
+ return ("identifiers.org/combine.specifications/sed" in format) or (
308
+ "identifiers.org/combine.specifications:sed" in format
309
+ )
310
+ if format_key == "sbgn":
311
+ return ("identifiers.org/combine.specifications/sbgn" in format) or (
312
+ "identifiers.org/combine.specifications:sbgn" in format
313
+ )
314
+
315
+ if hasattr(EntryFormat, format_key):
316
+ format_reference = str(getattr(EntryFormat, format_key.upper()))
317
+ return format_reference == format
318
+
319
+ return False
320
+
321
+ def is_sbml(self) -> bool:
322
+ """Check if entry is SBML."""
323
+ return ManifestEntry.is_format("sbml", self.format)
324
+
325
+ def is_sedml(self) -> bool:
326
+ """Check if entry is SED-ML."""
327
+ return ManifestEntry.is_format("sedml", self.format)
328
+
329
+ def is_sbgn(self) -> bool:
330
+ """Check if entry is SBGN."""
331
+ return ManifestEntry.is_format("sbgn", self.format)
332
+
333
+
334
+ class Manifest(BaseModel):
335
+ """COMBINE archive manifest.
336
+
337
+ A manifest is a list of ManifestEntries.
338
+ """
339
+
340
+ _entries_dict: Dict[str, ManifestEntry] = PrivateAttr()
341
+ entries: List[ManifestEntry] = [
342
+ ManifestEntry(location=".", format=EntryFormat.OMEX),
343
+ ManifestEntry(
344
+ location="./manifest.xml",
345
+ format=EntryFormat.OMEX_MANIFEST,
346
+ ),
347
+ ]
348
+
349
+ def __init__(self, **data) -> None: # type: ignore
350
+ """Initialize Manifest."""
351
+ super().__init__(**data)
352
+ for e in self.entries:
353
+ if not e.location.startswith("."):
354
+ logger.warning(
355
+ f"Relative location paths must start with './', but '{e.location}'."
356
+ )
357
+ e.location = f"./{e.location}"
358
+ self._entries_dict = {e.location: e for e in self.entries}
359
+
360
+ def __contains__(self, location: str) -> bool:
361
+ """Check if location is in manifest."""
362
+ return location in self._entries_dict
363
+
364
+ def __getitem__(self, location: str) -> ManifestEntry:
365
+ """Get entry by location."""
366
+ return self._entries_dict[location]
367
+
368
+ def __len__(self) -> int:
369
+ """Get number of entries."""
370
+ return len(self.entries)
371
+
372
+ @classmethod
373
+ def from_manifest(cls, manifest_path: Path) -> "Manifest":
374
+ """Create manifest from existing manifest.xml file."""
375
+ with open(manifest_path, "r") as f_manifest:
376
+ xml = f_manifest.read()
377
+ d = xmltodict.parse(xml)
378
+
379
+ # attributes have @ prefix
380
+ entries = []
381
+ for e in d["omexManifest"]["content"]:
382
+ entries.append({k.replace("@", ""): v for (k, v) in e.items()})
383
+
384
+ return Manifest(**{"entries": entries})
385
+
386
+ def to_manifest_xml(self) -> str:
387
+ """Create xml of manifest."""
388
+
389
+ def content_line(e: ManifestEntry) -> str:
390
+ if e.master:
391
+ master_token = ' master="true"'
392
+ else:
393
+ master_token = ' master="false"'
394
+ return f' <content location="{e.location}" format="{e.format}"{master_token} />'
395
+
396
+ lines = (
397
+ [
398
+ '<?xml version="1.0" encoding="UTF-8"?>',
399
+ '<omexManifest xmlns="http://identifiers.org/combine.specifications/omex-manifest">',
400
+ ]
401
+ + [content_line(e) for e in self.entries]
402
+ + ["</omexManifest>"]
403
+ )
404
+ return "\n".join(lines)
405
+
406
+ def to_manifest(self, manifest_path: Path) -> None:
407
+ """Write manifest.xml."""
408
+ with open(manifest_path, "w") as f_manifest:
409
+ xml = self.to_manifest_xml()
410
+ f_manifest.write(xml)
411
+
412
+ def add_entry(self, entry: ManifestEntry) -> None:
413
+ """Add entry to manifest.
414
+
415
+ Does not check for duplication.
416
+ """
417
+ entry.location = self._check_and_normalize_location(entry.location)
418
+ self.entries.append(entry)
419
+ self._entries_dict[entry.location] = entry
420
+
421
+ def remove_entry_for_location(self, location: str) -> Optional[ManifestEntry]:
422
+ """Remove entry for given location."""
423
+ location = self._check_and_normalize_location(location)
424
+
425
+ if location in [".", "./manifest.xml"]:
426
+ logger.error(
427
+ f"Core location cannot be removed from manifest: '{location}'."
428
+ )
429
+ return None
430
+ if location not in self:
431
+ logger.error(f"The location '{location}' does not exist in manifest.")
432
+ return None
433
+ else:
434
+ entry = self._entries_dict.pop(location)
435
+ self.entries = [e for e in self.entries if e.location != location]
436
+ return entry
437
+
438
+ @staticmethod
439
+ def _check_and_normalize_location(location: str) -> str:
440
+ """Add relative prefix and check location."""
441
+
442
+ if location.startswith("/"):
443
+ raise ValueError(
444
+ f"Locations must be relative paths in COMBINE archive, but location is "
445
+ f"'{location}'."
446
+ )
447
+
448
+ # add prefix
449
+ if not location.startswith("./") and location != ".":
450
+ location = f"./{location}"
451
+ return location
452
+
453
+
454
+ class Omex:
455
+ """Combine archive class."""
456
+
457
+ def __init__(self) -> None:
458
+ """Create COMBINE Archive Version 1."""
459
+ self.manifest: Manifest = Manifest()
460
+ self._tmp_dir: Path = Path(tempfile.mkdtemp())
461
+
462
+ def __exit__(self, exc_type, exc_value, traceback): # type: ignore
463
+ """Cleanup on exit."""
464
+ shutil.rmtree(self._tmp_dir)
465
+
466
+ def __str__(self) -> str:
467
+ """Get contents of archive string."""
468
+ return pprint.pformat(self.manifest.entries, indent=4, compact=True)
469
+
470
+ def get_path(self, location: str) -> Path:
471
+ """Get path for given location."""
472
+ # check that entry exists (raises KeyError)
473
+ _ = self.manifest[location]
474
+ return self._tmp_dir / location
475
+
476
+ @staticmethod
477
+ def _check_omex_path(omex_path: Path) -> Path:
478
+ """Check if omex path exist, is a file and a COMBINE archive."""
479
+ if isinstance(omex_path, str):
480
+ logger.warning(f"'omex_path' should be 'Path': '{omex_path}'")
481
+ omex_path = Path(omex_path)
482
+
483
+ if not omex_path.exists():
484
+ raise ValueError(f"'omex_path' does not exist: '{omex_path}'.")
485
+ if not omex_path.is_file():
486
+ raise ValueError(f"'omex_path' is not a file: '{omex_path}'.")
487
+
488
+ return omex_path
489
+
490
+ @staticmethod
491
+ def is_omex(omex_path: Path) -> bool:
492
+ """Check if path is an omex archive.
493
+
494
+ File must be a zip archive and contain a manifest.xml.
495
+ """
496
+ omex_path = Omex._check_omex_path(omex_path)
497
+
498
+ if not zipfile.is_zipfile(str(omex_path)):
499
+ logger.warning(f"Omex path '{omex_path}' is not a zip archive.")
500
+ return False
501
+
502
+ with zipfile.ZipFile(omex_path, mode="r") as zf:
503
+ try:
504
+ zf.getinfo("manifest.xml")
505
+ return True
506
+ except KeyError:
507
+ # manifest does not exist in archive
508
+ logger.warning(f"No 'manifest.xml' in '{omex_path}'.")
509
+ return False
510
+
511
+ @staticmethod
512
+ def from_omex(omex_path: Path, password: Optional[bytes] = None) -> "Omex":
513
+ """Read omex from given path.
514
+
515
+ :param omex_path: path to omex archive
516
+ :param password: password for encryption
517
+ :return: Omex object
518
+ """
519
+ omex_path = Omex._check_omex_path(omex_path)
520
+
521
+ # extract archive to tmp directory
522
+ with tempfile.TemporaryDirectory() as tmp_dir:
523
+ with zipfile.ZipFile(omex_path, "r") as zf:
524
+ # Figure out algorithm:
525
+ for info in zf.infolist():
526
+ if info.compress_type not in {
527
+ zipfile.ZIP_DEFLATED,
528
+ zipfile.ZIP_STORED,
529
+ }:
530
+ logger.warning(f"Unsupported compression for: '{info}'")
531
+ # extract all files
532
+ zf.extractall(tmp_dir, pwd=password)
533
+
534
+ return Omex.from_directory(Path(tmp_dir))
535
+
536
+ @classmethod
537
+ def from_directory(cls, directory: Path) -> "Omex":
538
+ """Create a COMBINE archive from a given directory.
539
+
540
+ The file types are inferred,
541
+ in case of existing manifest or metadata information this should be reused.
542
+
543
+ For all SED-ML files in the directory the master attribute is set to True.
544
+ """
545
+ if isinstance(directory, str):
546
+ logger.warning(f"'directory' should be 'Path': '{directory}'")
547
+ directory = Path(directory)
548
+
549
+ if not directory.exists():
550
+ msg = f"'directory' does not exist: '{directory}'."
551
+ logger.error(msg)
552
+ raise ValueError(msg)
553
+
554
+ if not directory.is_dir():
555
+ msg = f"'directory' is not a directory: '{directory}'."
556
+ logger.error(msg)
557
+ raise ValueError(msg)
558
+
559
+ manifest_path: Path = directory / "manifest.xml"
560
+ manifest: Optional[Manifest] = None
561
+ if manifest_path.exists():
562
+ manifest = Manifest.from_manifest(manifest_path)
563
+ else:
564
+ logger.error(
565
+ f"No 'manifest.xml' in directory: '{directory}'. Trying "
566
+ f"to create manifest.xml."
567
+ )
568
+
569
+ # new archive
570
+ omex = Omex()
571
+
572
+ # iterate over all locations and add entry
573
+ for root, _dirs, files in os.walk(str(directory)):
574
+ for file in files:
575
+ file_path = os.path.join(root, file)
576
+ location = f"./{os.path.relpath(file_path, directory)}"
577
+ if location == "./manifest.xml":
578
+ # manifest is created from the internal manifest entries
579
+ continue
580
+
581
+ logger.debug(f"'{file_path}' -> '{location}'")
582
+ entry: ManifestEntry
583
+ if manifest and location in manifest:
584
+ # use entry from existing manifest
585
+ entry = manifest[location]
586
+ else:
587
+ if manifest and location not in manifest:
588
+ logger.warning(
589
+ f"Entry with location missing in manifest.xml: '{location}'"
590
+ )
591
+
592
+ format = Omex.guess_format(Path(file_path))
593
+ master = False
594
+ if format and ManifestEntry.is_format(
595
+ format_key="sedml", format=format
596
+ ):
597
+ master = True
598
+ entry = ManifestEntry(
599
+ location=location,
600
+ format=format,
601
+ master=master,
602
+ )
603
+
604
+ omex.add_entry(entry_path=Path(file_path), entry=entry)
605
+
606
+ return omex
607
+
608
+ def add_entry(self, entry_path: Path, entry: ManifestEntry) -> None:
609
+ """Add a path to the combine archive.
610
+
611
+ The corresponding ManifestEntry information is required.
612
+ The entry is copied when getting added, i.e., changes to the location
613
+ after adding an entry will not have any effect on the content in the
614
+ archive!
615
+ """
616
+ if isinstance(entry_path, str):
617
+ logger.warning(f"'entry_path' should be 'Path': '{entry_path}'")
618
+ entry_path = Path(entry_path)
619
+
620
+ if not entry_path.exists():
621
+ msg = f"'entry_path' does not exist: '{entry_path}'."
622
+ logger.error(msg)
623
+ raise ValueError(msg)
624
+
625
+ if not entry_path.is_file():
626
+ raise ValueError(f"'entry_path' is not a file: '{entry_path}'.")
627
+
628
+ if entry.location in self.manifest:
629
+ logger.warning(
630
+ f"Location already exists and is overwritten: '{entry.location}'."
631
+ )
632
+ self.manifest.remove_entry_for_location(entry.location)
633
+
634
+ # copy path
635
+ destination = self._tmp_dir / entry.location
636
+ if not destination.parent.exists():
637
+ destination.parent.mkdir(parents=True)
638
+ shutil.copy2(src=str(entry_path), dst=str(destination))
639
+
640
+ # add entry
641
+ self.manifest.add_entry(entry)
642
+
643
+ def remove_entry_for_location(self, location: str) -> Optional[ManifestEntry]:
644
+ """Remove entry and corresponding entry_path."""
645
+ entry = self.manifest.remove_entry_for_location(location)
646
+ if entry:
647
+ destination = self._tmp_dir / entry.location
648
+ os.remove(destination)
649
+ return entry
650
+
651
+ def to_omex(
652
+ self,
653
+ omex_path: Path,
654
+ password: Optional[str] = None,
655
+ compression: int = zipfile.ZIP_DEFLATED,
656
+ compresslevel: int = 9,
657
+ ) -> None:
658
+ """Write omex to path.
659
+
660
+ By definition OMEX files should be zip deflated.
661
+
662
+ The `compresslevel` parameter controls the compression level to use when
663
+ writing files to the archive. When using `ZIP_STORED` or `ZIP_LZMA` it has no
664
+ effect. When using `ZIP_DEFLATED` integers 0 through 9 are accepted
665
+ (see zlib for more information). When using ZIP_BZIP2 integers 1 through 9
666
+ are accepted (see bz2 for more information). The larger the value the better
667
+ te compression
668
+
669
+ :param omex_path:
670
+ :param compression: compression algorithm
671
+ :param compresslevel: level of compression
672
+ :return:
673
+ """
674
+ if isinstance(omex_path, str):
675
+ logger.warning(f"'omex_path' should be 'Path': '{omex_path}'")
676
+ omex_path = Path(omex_path)
677
+
678
+ if omex_path.exists():
679
+ logger.warning(f"Existing omex is overwritten: '{omex_path}'")
680
+
681
+ # write tmp dir
682
+ with tempfile.TemporaryDirectory() as tmp_dir:
683
+ self.to_directory(output_dir=Path(tmp_dir))
684
+
685
+ # compress directory as zip
686
+ with zipfile.ZipFile(
687
+ omex_path,
688
+ mode="w",
689
+ compression=compression,
690
+ compresslevel=compresslevel,
691
+ ) as zf:
692
+ for e in self.manifest.entries:
693
+ if e.location != ".":
694
+ f = Path(tmp_dir) / e.location
695
+ zf.write(filename=str(f), arcname=e.location)
696
+
697
+ def to_directory(self, output_dir: Path) -> None:
698
+ """Extract combine archive to output directory.
699
+
700
+ :param output_dir: output directory
701
+ :return:
702
+ """
703
+
704
+ if isinstance(output_dir, str):
705
+ logger.warning(f"'output_dir' should be 'Path': '{output_dir}'")
706
+ output_dir = Path(output_dir)
707
+
708
+ if output_dir and not output_dir.exists():
709
+ logger.warning(f"Creating working directory: {output_dir}")
710
+ output_dir.mkdir(parents=True, exist_ok=True)
711
+
712
+ # iterate over all locations and copy to destination
713
+ for entry in self.manifest.entries:
714
+ if entry.location in [".", "./manifest.xml"]:
715
+ continue
716
+ src = self._tmp_dir / entry.location
717
+ destination = output_dir / entry.location
718
+ destination.parent.mkdir(parents=True, exist_ok=True)
719
+ logger.debug(f"'{src}' -> '{destination}")
720
+ shutil.copy2(src=str(src), dst=str(destination))
721
+
722
+ # write manifest.xml
723
+ self.manifest.to_manifest(manifest_path=output_dir / "manifest.xml")
724
+
725
+ def entries_by_format(self, format_key: str) -> List[ManifestEntry]:
726
+ """Get entries with given format in the archive."""
727
+
728
+ entries: List[ManifestEntry] = []
729
+ for entry in self.manifest.entries:
730
+ if ManifestEntry.is_format(format_key, entry.format):
731
+ entries.append(entry)
732
+
733
+ return entries
734
+
735
+ @staticmethod
736
+ def lookup_format(format_key: str) -> str:
737
+ """Lookup format by format_key."""
738
+ if hasattr(EntryFormat, format_key.upper()):
739
+ return str(getattr(EntryFormat, format_key.upper()).value)
740
+
741
+ logger.error(f"Unknown format_key: {format_key}")
742
+ return PURL_PREFIX + "application/x.unknown"
743
+
744
+ @staticmethod
745
+ def guess_format(path: Path) -> str:
746
+ """Guess format string for given file.
747
+
748
+ If string cannot be resolved '' is returned.
749
+ """
750
+
751
+ extension = path.suffix[1:] if path.suffix else ""
752
+ if extension == "xml":
753
+ with open(path, "r") as f_in:
754
+ try:
755
+ text = f_in.read(256)
756
+ if "<sbml" in text:
757
+ return Omex.lookup_format("sbml")
758
+ if "<sedML" in text:
759
+ return Omex.lookup_format("sedml")
760
+ if "<cell" in text:
761
+ return Omex.lookup_format("cellml")
762
+ if "<COPASI" in text:
763
+ return Omex.lookup_format("copasi")
764
+ except UnicodeDecodeError as err:
765
+ # handle incorrect encodings
766
+ logger.error(
767
+ f"UnicodeDecodeError in '{path}', "
768
+ f"incorrect file encoding: '{err}'"
769
+ )
770
+
771
+ return Omex.lookup_format(extension)