MDAnalysisData 0.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. MDAnalysisData/CG_fiber.py +88 -0
  2. MDAnalysisData/PEG_1chain.py +88 -0
  3. MDAnalysisData/__init__.py +22 -0
  4. MDAnalysisData/adk_equilibrium.py +88 -0
  5. MDAnalysisData/adk_transitions.py +196 -0
  6. MDAnalysisData/authors.py +12 -0
  7. MDAnalysisData/base.py +232 -0
  8. MDAnalysisData/datasets.py +33 -0
  9. MDAnalysisData/descr/CG_fiber.rst +23 -0
  10. MDAnalysisData/descr/PEG_1chain.rst +23 -0
  11. MDAnalysisData/descr/adk_equilibrium.rst +39 -0
  12. MDAnalysisData/descr/adk_transitions_DIMS.rst +42 -0
  13. MDAnalysisData/descr/adk_transitions_FRODA.rst +42 -0
  14. MDAnalysisData/descr/ifabp_water.rst +40 -0
  15. MDAnalysisData/descr/membrane_peptide.rst +44 -0
  16. MDAnalysisData/descr/nhaa_equilibrium.rst +42 -0
  17. MDAnalysisData/descr/vesicle_lib.rst +46 -0
  18. MDAnalysisData/descr/yiip_equilibrium.rst +42 -0
  19. MDAnalysisData/ifabp_water.py +97 -0
  20. MDAnalysisData/membrane_peptide.py +88 -0
  21. MDAnalysisData/nhaa_equilibrium.py +89 -0
  22. MDAnalysisData/tests/__init__.py +3 -0
  23. MDAnalysisData/tests/conftest.py +5 -0
  24. MDAnalysisData/tests/test_base.py +144 -0
  25. MDAnalysisData/tests/test_datasets.py +179 -0
  26. MDAnalysisData/tests/test_package_metadata.py +31 -0
  27. MDAnalysisData/vesicles.py +111 -0
  28. MDAnalysisData/yiip_equilibrium.py +151 -0
  29. mdanalysisdata-0.9.1.dist-info/METADATA +137 -0
  30. mdanalysisdata-0.9.1.dist-info/RECORD +35 -0
  31. mdanalysisdata-0.9.1.dist-info/WHEEL +5 -0
  32. mdanalysisdata-0.9.1.dist-info/licenses/AUTHORS +24 -0
  33. mdanalysisdata-0.9.1.dist-info/licenses/LICENSE +30 -0
  34. mdanalysisdata-0.9.1.dist-info/top_level.txt +1 -0
  35. mdanalysisdata-0.9.1.dist-info/zip-safe +1 -0
@@ -0,0 +1,88 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ """Coarse-grained molecular dynamics of an amphiphilic fiber.
4
+
5
+ https://figshare.com/articles/126chains_dcd/7259915
6
+ """
7
+
8
+ from os.path import dirname, exists, join
9
+ from os import makedirs, remove
10
+
11
+ import logging
12
+
13
+ from .base import get_data_home
14
+ from .base import _fetch_remote, _read_description
15
+ from .base import RemoteFileMetadata
16
+ from .base import Bunch
17
+
18
+ NAME = "CG_fiber"
19
+ DESCRIPTION = "CG_fiber.rst"
20
+ # The original data can be found at the figshare URL.
21
+ # The SHA256 checksum of the zip file changes with every download so we
22
+ # cannot check its checksum. Instead we download individual files.
23
+ # separately. The keys of this dict are also going to be the keys in the
24
+ # Bunch that is returned.
25
+ ARCHIVE = {
26
+ 'topology': RemoteFileMetadata(
27
+ filename='126chains.psf',
28
+ url='https://ndownloader.figshare.com/files/13374146',
29
+ checksum='3ddb654b68549ac2ad5107a4282899f41fad233d09ea572446031711af4e57da',
30
+ ),
31
+ 'trajectory': RemoteFileMetadata(
32
+ filename='126chains.dcd',
33
+ url='https://ndownloader.figshare.com/files/13375838',
34
+ checksum='e0b47d422f31ec209ea810edcf6cf3830da04bb2e1540f520477c27f4433d849',
35
+ ),
36
+ }
37
+
38
+ logger = logging.getLogger(__name__)
39
+
40
+
41
+ def fetch_CG_fiber(data_home=None, download_if_missing=True):
42
+ """Load the CG fiber self-assembly trajectory
43
+
44
+ Parameters
45
+ ----------
46
+ data_home : optional, default: None
47
+ Specify another download and cache folder for the datasets. By default
48
+ all MDAnalysisData data is stored in '~/MDAnalysis_data' subfolders.
49
+ This dataset is stored in ``<data_home>/CG_fiber``.
50
+ download_if_missing : optional, default=True
51
+ If ``False``, raise a :exc:`IOError` if the data is not locally available
52
+ instead of trying to download the data from the source site.
53
+
54
+ Returns
55
+ -------
56
+ dataset : dict-like object with the following attributes:
57
+ dataset.topology : filename
58
+ Filename of the topology file
59
+ dataset.trajectory : filename
60
+ Filename of the trajectory file
61
+ dataset.DESCR : string
62
+ Description of the trajectory.
63
+
64
+
65
+ See :ref:`CG_fiber-dataset` for description.
66
+ """
67
+ name = NAME
68
+ data_location = join(get_data_home(data_home=data_home),
69
+ name)
70
+ if not exists(data_location):
71
+ makedirs(data_location)
72
+
73
+ records = Bunch()
74
+ for file_type, meta in ARCHIVE.items():
75
+ local_path = join(data_location, meta.filename)
76
+ records[file_type] = local_path
77
+
78
+ if not exists(local_path):
79
+ if not download_if_missing:
80
+ raise IOError("Data {0}={1} not found and `download_if_missing` is "
81
+ "False".format(file_type, local_path))
82
+ logger.info("Downloading {0}: {1} -> {2}...".format(
83
+ file_type, meta.url, local_path))
84
+ archive_path = _fetch_remote(meta, dirname=data_location)
85
+
86
+ records.DESCR = _read_description(DESCRIPTION)
87
+
88
+ return records
@@ -0,0 +1,88 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ """Molecular dynamics trajectory of a single PEG chain in TIP3P water.
4
+
5
+ https://doi.org/10.6084/m9.figshare.7325774
6
+ """
7
+
8
+ from os.path import dirname, exists, join
9
+ from os import makedirs, remove
10
+
11
+ import logging
12
+
13
+ from .base import get_data_home
14
+ from .base import _fetch_remote, _read_description
15
+ from .base import RemoteFileMetadata
16
+ from .base import Bunch
17
+
18
+ NAME = "PEG_1chain"
19
+ DESCRIPTION = "PEG_1chain.rst"
20
+ # The original data can be found at the figshare URL.
21
+ # The SHA256 checksum of the zip file changes with every download so we
22
+ # cannot check its checksum. Instead we download individual files.
23
+ # separately. The keys of this dict are also going to be the keys in the
24
+ # Bunch that is returned.
25
+ ARCHIVE = {
26
+ 'topology': RemoteFileMetadata(
27
+ filename='PEG.prmtop',
28
+ url='https://ndownloader.figshare.com/files/13532462',
29
+ checksum='2d7955b9a8cb6e008171e0c5a1c31e3e458246ea3ee7302281eafefafa7cede9',
30
+ ),
31
+ 'trajectory': RemoteFileMetadata(
32
+ filename='PEG_03_prod.nc',
33
+ url='https://ndownloader.figshare.com/files/13532465',
34
+ checksum='b978714ec2f93d1cbe99564cb257959f0cb38872359aa745c8eba720a7d85225',
35
+ ),
36
+ }
37
+
38
+ logger = logging.getLogger(__name__)
39
+
40
+
41
+ def fetch_PEG_1chain(data_home=None, download_if_missing=True):
42
+ """Load the PEG polymer trajectory
43
+
44
+ Parameters
45
+ ----------
46
+ data_home : optional, default: None
47
+ Specify another download and cache folder for the datasets. By default
48
+ all MDAnalysisData data is stored in '~/MDAnalysis_data' subfolders.
49
+ This dataset is stored in ``<data_home>/CG_fiber``.
50
+ download_if_missing : optional, default=True
51
+ If ``False``, raise a :exc:`IOError` if the data is not locally available
52
+ instead of trying to download the data from the source site.
53
+
54
+ Returns
55
+ -------
56
+ dataset : dict-like object with the following attributes:
57
+ dataset.topology : filename
58
+ Filename of the topology file
59
+ dataset.trajectory : filename
60
+ Filename of the trajectory file
61
+ dataset.DESCR : string
62
+ Description of the trajectory.
63
+
64
+
65
+ See :ref:`PEG_1chain-dataset` for description.
66
+ """
67
+ name = NAME
68
+ data_location = join(get_data_home(data_home=data_home),
69
+ name)
70
+ if not exists(data_location):
71
+ makedirs(data_location)
72
+
73
+ records = Bunch()
74
+ for file_type, meta in ARCHIVE.items():
75
+ local_path = join(data_location, meta.filename)
76
+ records[file_type] = local_path
77
+
78
+ if not exists(local_path):
79
+ if not download_if_missing:
80
+ raise IOError("Data {0}={1} not found and `download_if_missing` is "
81
+ "False".format(file_type, local_path))
82
+ logger.info("Downloading {0}: {1} -> {2}...".format(
83
+ file_type, meta.url, local_path))
84
+ archive_path = _fetch_remote(meta, dirname=data_location)
85
+
86
+ records.DESCR = _read_description(DESCRIPTION)
87
+
88
+ return records
@@ -0,0 +1,22 @@
1
+ # MDAnalysisData package
2
+ #
3
+ # Modelled after sklearn.datasets
4
+ # https://github.com/scikit-learn/scikit-learn/tree/0.20.X/sklearn/datasets
5
+
6
+ __all__ = ['datasets']
7
+
8
+ from . import datasets
9
+
10
+
11
+ from importlib.metadata import version
12
+ __version__ = version("MDAnalysisData")
13
+
14
+
15
+ try:
16
+ from .authors import __authors__
17
+ except ImportError:
18
+ import warnings
19
+ warnings.warn('Could not find authors.py, __authors__ will be the '
20
+ 'generic MDAnalysis team.')
21
+ __authors__ = ["The MDAnalysis Development Team"]
22
+ del warnings
@@ -0,0 +1,88 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ """AdK equilibrium trajectory without water.
4
+
5
+ https://figshare.com/articles/Molecular_dynamics_trajectory_for_benchmarking_MDAnalysis/5108170/1
6
+ """
7
+
8
+ from os.path import dirname, exists, join
9
+ from os import makedirs, remove
10
+
11
+ import logging
12
+
13
+ from .base import get_data_home
14
+ from .base import _fetch_remote, _read_description
15
+ from .base import RemoteFileMetadata
16
+ from .base import Bunch
17
+
18
+ NAME = "adk_equilibrium"
19
+ DESCRIPTION = "adk_equilibrium.rst"
20
+ # The original data can be found at the figshare URL.
21
+ # The SHA256 checksum of the zip file changes with every download so we
22
+ # cannot check its checksum. Instead we download individual files.
23
+ # separately. The keys of this dict are also going to be the keys in the
24
+ # Bunch that is returned.
25
+ ARCHIVE = {
26
+ 'topology': RemoteFileMetadata(
27
+ filename='adk4AKE.psf',
28
+ url='https://ndownloader.figshare.com/files/8672230',
29
+ checksum='1aa947d58fb41b6805dc1e7be4dbe65c6a8f4690f0bd7fc2ae03e7bd437085f4',
30
+ ),
31
+ 'trajectory': RemoteFileMetadata(
32
+ filename='1ake_007-nowater-core-dt240ps.dcd',
33
+ url='https://ndownloader.figshare.com/files/8672074',
34
+ checksum='598fcbcfcc425f6eafbe9997238320fcacc6a4613ecce061e1521732bab734bf',
35
+ ),
36
+ }
37
+
38
+ logger = logging.getLogger(__name__)
39
+
40
+
41
+ def fetch_adk_equilibrium(data_home=None, download_if_missing=True):
42
+ """Load the AdK 1us equilibrium trajectory (without water)
43
+
44
+ Parameters
45
+ ----------
46
+ data_home : optional, default: None
47
+ Specify another download and cache folder for the datasets. By default
48
+ all MDAnalysisData data is stored in '~/MDAnalysis_data' subfolders.
49
+ This dataset is stored in ``<data_home>/adk_equilibrium``.
50
+ download_if_missing : optional, default=True
51
+ If ``False``, raise a :exc:`IOError` if the data is not locally available
52
+ instead of trying to download the data from the source site.
53
+
54
+ Returns
55
+ -------
56
+ dataset : dict-like object with the following attributes:
57
+ dataset.topology : filename
58
+ Filename of the topology file
59
+ dataset.trajectory : filename
60
+ Filename of the trajectory file
61
+ dataset.DESCR : string
62
+ Description of the trajectory.
63
+
64
+
65
+ See :ref:`adk-equilibrium-dataset` for description.
66
+ """
67
+ name = NAME
68
+ data_location = join(get_data_home(data_home=data_home),
69
+ name)
70
+ if not exists(data_location):
71
+ makedirs(data_location)
72
+
73
+ records = Bunch()
74
+ for file_type, meta in ARCHIVE.items():
75
+ local_path = join(data_location, meta.filename)
76
+ records[file_type] = local_path
77
+
78
+ if not exists(local_path):
79
+ if not download_if_missing:
80
+ raise IOError("Data {0}={1} not found and `download_if_missing` is "
81
+ "False".format(file_type, local_path))
82
+ logger.info("Downloading {0}: {1} -> {2}...".format(
83
+ file_type, meta.url, local_path))
84
+ archive_path = _fetch_remote(meta, dirname=data_location)
85
+
86
+ records.DESCR = _read_description(DESCRIPTION)
87
+
88
+ return records
@@ -0,0 +1,196 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ """Ensembles of AdK transitions.
4
+
5
+ https://figshare.com/articles/Simulated_trajectory_ensembles_for_the_closed-to-open_transition_of_adenylate_kinase_from_DIMS_MD_and_FRODA/7165306
6
+ """
7
+
8
+
9
+ from os.path import dirname, exists, join
10
+ from os import makedirs, remove
11
+ import tarfile
12
+ import glob
13
+
14
+ import logging
15
+
16
+ from .base import get_data_home
17
+ from .base import _fetch_remote, _read_description
18
+ from .base import RemoteFileMetadata
19
+ from .base import Bunch
20
+
21
+
22
+ METADATA = {
23
+ 'DIMS': {
24
+ 'NAME': "adk_transitions_DIMS",
25
+ 'DESCRIPTION': "adk_transitions_DIMS.rst",
26
+ 'ARCHIVE': {
27
+ 'tarfile': RemoteFileMetadata(
28
+ filename='DIMS.tar.gz',
29
+ url='https://ndownloader.figshare.com/files/13182490',
30
+ checksum='81dfd247da7084bc7f47889c098069978b61f8f8b4f7706841266d284bfd3b55',
31
+ ),
32
+ },
33
+ 'CONTENTS': {
34
+ 'topology': "DIMS/topologies/adk4ake.psf",
35
+ 'trajectories': "DIMS/trajectories/dims*_fit-core.dcd",
36
+ 'N_trajectories': 200,
37
+ },
38
+ },
39
+ 'FRODA': {
40
+ 'NAME': "adk_transitions_FRODA",
41
+ 'DESCRIPTION': "adk_transitions_FRODA.rst",
42
+ 'ARCHIVE': {
43
+ 'tarfile': RemoteFileMetadata(
44
+ filename='FRODA.tar.gz',
45
+ url='https://ndownloader.figshare.com/files/13182493',
46
+ checksum='fc2c90b9819fd07720e7effada033d4045663919ba7d2c8bd84f548dfbeee73c',
47
+ ),
48
+ },
49
+ 'CONTENTS': {
50
+ 'topology': "FRODA/topologies/1ake.pdb",
51
+ 'trajectories': "FRODA/trajectories/pathway*_fit-core.dcd",
52
+ 'N_trajectories': 200,
53
+ },
54
+ },
55
+ }
56
+
57
+ logger = logging.getLogger(__name__)
58
+
59
+ def fetch_adk_transitions_DIMS(data_home=None, download_if_missing=True):
60
+ """Load the AdK DIMS transititions dataset
61
+
62
+ Parameters
63
+ ----------
64
+ data_home : optional, default: None
65
+ Specify another download and cache folder for the datasets. By default
66
+ all MDAnalysisData data is stored in '~/MDAnalysis_data' subfolders.
67
+ This dataset is stored in ``<data_home>/adk_transitions_DIMS``.
68
+ download_if_missing : optional, default=True
69
+ If ``False``, raise a :exc:`IOError` if the data is not locally available
70
+ instead of trying to download the data from the source site.
71
+
72
+ Returns
73
+ -------
74
+ dataset : dict-like object with the following attributes:
75
+ dataset.topology : filename
76
+ Filename of the topology file
77
+ dataset.trajectories : list
78
+ list with filenames of the trajectory ensemble
79
+ dataset.N_trajectories : int
80
+ number of trajectories in the ensemble
81
+ dataset.DESCR : string
82
+ Description of the ensemble
83
+
84
+
85
+ See :ref:`adk-transitions-DIMS-dataset` for description.
86
+ """
87
+ return _fetch_adk_transitions(METADATA['DIMS'],
88
+ data_home=data_home,
89
+ download_if_missing=download_if_missing)
90
+
91
+ def fetch_adk_transitions_FRODA(data_home=None, download_if_missing=True):
92
+ """Load the AdK FRODA transititions dataset
93
+
94
+ Parameters
95
+ ----------
96
+ data_home : optional, default: None
97
+ Specify another download and cache folder for the datasets. By default
98
+ all MDAnalysisData data is stored in '~/MDAnalysis_data' subfolders.
99
+ This dataset is stored in ``<data_home>/adk_transitions_FRODA``.
100
+ download_if_missing : optional, default=True
101
+ If ``False``, raise a :exc:`IOError` if the data is not locally available
102
+ instead of trying to download the data from the source site.
103
+
104
+ Returns
105
+ -------
106
+ dataset : dict-like object with the following attributes:
107
+ dataset.topology : filename
108
+ Filename of the topology file
109
+ dataset.trajectories : list
110
+ list with filenames of the trajectory ensemble
111
+ dataset.N_trajectories : int
112
+ number of trajectories in the ensemble
113
+ dataset.DESCR : string
114
+ Description of the ensemble
115
+
116
+
117
+ See :ref:`adk-transitions-FRODA-dataset` for description.
118
+ """
119
+ return _fetch_adk_transitions(METADATA['FRODA'],
120
+ data_home=data_home,
121
+ download_if_missing=download_if_missing)
122
+
123
+
124
+ def _fetch_adk_transitions(metadata, data_home=None, download_if_missing=True):
125
+ """Generic function to load the AdK transititions datasets
126
+
127
+ Parameters
128
+ ----------
129
+ metdata : dict
130
+ dictionary with `NAME`, `DESCRIPTION` and `ARCHIVE` that contains
131
+ a tar.gz file with directories topologies and trajectories
132
+ data_home : optional, default: None
133
+ Specify another download and cache folder for the datasets. By default
134
+ all MDAnalysisData data is stored in '~/MDAnalysis_data' subfolders.
135
+ download_if_missing : optional, default=True
136
+ If ``False``, raise a :exc:`IOError` if the data is not locally available
137
+ instead of trying to download the data from the source site.
138
+
139
+ Returns
140
+ -------
141
+ dataset : dict-like object with the following attributes:
142
+ dataset.topology : filename
143
+ Filename of the topology file
144
+ dataset.trajectories : list
145
+ list with filenames of the trajectory ensemble
146
+ dataset.DESCR : string
147
+ Description of the ensemble
148
+
149
+ Note
150
+ ----
151
+ Assumptions that are built in:
152
+ - download a single tar.gz file
153
+ - trajectories are given with a glob pattern
154
+
155
+ """
156
+ name = metadata['NAME']
157
+ data_location = join(get_data_home(data_home=data_home),
158
+ name)
159
+ if not exists(data_location):
160
+ makedirs(data_location)
161
+
162
+ records = Bunch()
163
+
164
+ meta = metadata['ARCHIVE']['tarfile']
165
+ local_path = join(data_location, meta.filename)
166
+
167
+ if not exists(local_path):
168
+ if not download_if_missing:
169
+ raise IOError("Data {0}={1} not found and `download_if_missing` is "
170
+ "False".format(file_type, local_path))
171
+ logger.info("Downloading {0}: {1} -> {2}...".format(
172
+ "tarfile", meta.url, local_path))
173
+ archive_path = _fetch_remote(meta, dirname=data_location)
174
+
175
+ logger.info("Unpacking {}...".format(archive_path))
176
+ with tarfile.open(archive_path, 'r') as tar:
177
+ tar.extractall(path=data_location)
178
+
179
+ records.topology = join(data_location, metadata['CONTENTS']['topology'])
180
+ if not exists(records.topology):
181
+ # should not happen...
182
+ raise RuntimeError("topology file {} is missing".format(records.topology))
183
+
184
+ trajectory_pattern = join(data_location, metadata['CONTENTS']['trajectories'])
185
+ records.trajectories = glob.glob(trajectory_pattern)
186
+ records.N_trajectories = metadata['CONTENTS']['N_trajectories']
187
+ if len(records.trajectories) != records.N_trajectories:
188
+ # should not happen...
189
+ raise RuntimeError("trajectory files in {0} are incomplete: only {1} "
190
+ "but should be {2}.".format(
191
+ trajectory_pattern, len(records.trajectories),
192
+ records.N_trajectories))
193
+
194
+ records.DESCR = _read_description(metadata['DESCRIPTION'])
195
+
196
+ return records
@@ -0,0 +1,12 @@
1
+ #-*- coding:utf-8 -*-
2
+ # This file is generated from the AUTHORS file during the installation process.
3
+ # Do not edit it as your changes will be overwritten.
4
+
5
+ __authors__ = [
6
+ u"Irfan Alibay",
7
+ u"Oliver Beckstein",
8
+ u"Shujie Fan",
9
+ u"Richard J. Gowers",
10
+ u"Micaela Matta",
11
+ u"Lily Wang"
12
+ ]