napistu 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/__init__.py +12 -0
- napistu/__main__.py +867 -0
- napistu/consensus.py +1557 -0
- napistu/constants.py +500 -0
- napistu/gcs/__init__.py +10 -0
- napistu/gcs/constants.py +69 -0
- napistu/gcs/downloads.py +180 -0
- napistu/identifiers.py +805 -0
- napistu/indices.py +227 -0
- napistu/ingestion/__init__.py +10 -0
- napistu/ingestion/bigg.py +146 -0
- napistu/ingestion/constants.py +296 -0
- napistu/ingestion/cpr_edgelist.py +106 -0
- napistu/ingestion/identifiers_etl.py +148 -0
- napistu/ingestion/obo.py +268 -0
- napistu/ingestion/psi_mi.py +276 -0
- napistu/ingestion/reactome.py +218 -0
- napistu/ingestion/sbml.py +621 -0
- napistu/ingestion/string.py +356 -0
- napistu/ingestion/trrust.py +285 -0
- napistu/ingestion/yeast.py +147 -0
- napistu/mechanism_matching.py +597 -0
- napistu/modify/__init__.py +10 -0
- napistu/modify/constants.py +86 -0
- napistu/modify/curation.py +628 -0
- napistu/modify/gaps.py +635 -0
- napistu/modify/pathwayannot.py +1381 -0
- napistu/modify/uncompartmentalize.py +264 -0
- napistu/network/__init__.py +10 -0
- napistu/network/constants.py +117 -0
- napistu/network/neighborhoods.py +1594 -0
- napistu/network/net_create.py +1647 -0
- napistu/network/net_utils.py +652 -0
- napistu/network/paths.py +500 -0
- napistu/network/precompute.py +221 -0
- napistu/rpy2/__init__.py +127 -0
- napistu/rpy2/callr.py +168 -0
- napistu/rpy2/constants.py +101 -0
- napistu/rpy2/netcontextr.py +464 -0
- napistu/rpy2/rids.py +697 -0
- napistu/sbml_dfs_core.py +2216 -0
- napistu/sbml_dfs_utils.py +304 -0
- napistu/source.py +394 -0
- napistu/utils.py +943 -0
- napistu-0.1.0.dist-info/METADATA +56 -0
- napistu-0.1.0.dist-info/RECORD +77 -0
- napistu-0.1.0.dist-info/WHEEL +5 -0
- napistu-0.1.0.dist-info/entry_points.txt +2 -0
- napistu-0.1.0.dist-info/licenses/LICENSE +21 -0
- napistu-0.1.0.dist-info/top_level.txt +2 -0
- tests/__init__.py +0 -0
- tests/conftest.py +83 -0
- tests/test_consensus.py +255 -0
- tests/test_constants.py +20 -0
- tests/test_curation.py +134 -0
- tests/test_data/__init__.py +0 -0
- tests/test_edgelist.py +20 -0
- tests/test_gcs.py +23 -0
- tests/test_identifiers.py +151 -0
- tests/test_igraph.py +353 -0
- tests/test_indices.py +88 -0
- tests/test_mechanism_matching.py +126 -0
- tests/test_net_utils.py +66 -0
- tests/test_netcontextr.py +105 -0
- tests/test_obo.py +34 -0
- tests/test_pathwayannot.py +95 -0
- tests/test_precomputed_distances.py +222 -0
- tests/test_rpy2.py +61 -0
- tests/test_sbml.py +46 -0
- tests/test_sbml_dfs_create.py +307 -0
- tests/test_sbml_dfs_utils.py +22 -0
- tests/test_sbo.py +11 -0
- tests/test_set_coverage.py +50 -0
- tests/test_source.py +67 -0
- tests/test_uncompartmentalize.py +40 -0
- tests/test_utils.py +487 -0
- tests/utils.py +30 -0
tests/test_utils.py
ADDED
@@ -0,0 +1,487 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import gzip
|
4
|
+
import os
|
5
|
+
from datetime import datetime
|
6
|
+
from unittest.mock import Mock
|
7
|
+
from unittest.mock import patch
|
8
|
+
|
9
|
+
import numpy as np
|
10
|
+
import pandas as pd
|
11
|
+
import pytest
|
12
|
+
from napistu import utils
|
13
|
+
from fs.tarfs import TarFS
|
14
|
+
from fs.zipfs import ZipFS
|
15
|
+
from google.cloud import storage
|
16
|
+
from pytest import fixture
|
17
|
+
from testcontainers.core.container import DockerContainer
|
18
|
+
|
19
|
+
|
20
|
+
@fixture(scope="session")
|
21
|
+
def gcs_storage():
|
22
|
+
"""A container running a GCS emulator"""
|
23
|
+
with (
|
24
|
+
DockerContainer("fsouza/fake-gcs-server:1.44")
|
25
|
+
.with_bind_ports(4443, 4443)
|
26
|
+
.with_command("-scheme http -backend memory")
|
27
|
+
) as gcs:
|
28
|
+
os.environ["STORAGE_EMULATOR_HOST"] = "http://0.0.0.0:4443"
|
29
|
+
yield gcs
|
30
|
+
|
31
|
+
|
32
|
+
@fixture
|
33
|
+
def gcs_bucket_name(gcs_storage):
|
34
|
+
bucket_name = f"testbucket-{datetime.now().strftime('%Y%m%d%H%M%S%f')}"
|
35
|
+
return bucket_name
|
36
|
+
|
37
|
+
|
38
|
+
@fixture
|
39
|
+
def gcs_bucket(gcs_bucket_name):
|
40
|
+
"""A GCS bucket"""
|
41
|
+
client = storage.Client()
|
42
|
+
client.create_bucket(gcs_bucket_name)
|
43
|
+
bucket = client.bucket(gcs_bucket_name)
|
44
|
+
yield bucket
|
45
|
+
bucket.delete(force=True)
|
46
|
+
|
47
|
+
|
48
|
+
@fixture
|
49
|
+
def gcs_bucket_uri(gcs_bucket, gcs_bucket_name):
|
50
|
+
return f"gs://{gcs_bucket_name}"
|
51
|
+
|
52
|
+
|
53
|
+
@fixture
|
54
|
+
def gcs_bucket_subdir_uri(gcs_bucket_uri):
|
55
|
+
return f"{gcs_bucket_uri}/testdir"
|
56
|
+
|
57
|
+
|
58
|
+
@fixture
|
59
|
+
def tmp_new_subdir(tmp_path):
|
60
|
+
"""An empty temporary directory"""
|
61
|
+
return tmp_path / "test_dir"
|
62
|
+
|
63
|
+
|
64
|
+
def create_blob(bucket, blob_name, content=b"test"):
|
65
|
+
# create the marker file
|
66
|
+
bucket.blob(blob_name).upload_from_string(content)
|
67
|
+
|
68
|
+
|
69
|
+
def test_get_source_base_and_path_gcs():
|
70
|
+
source_base, source_path = utils.get_source_base_and_path(
|
71
|
+
"gs://cpr-ml-dev-us-east1/cpr/tests/test_data/pw_index.tsv"
|
72
|
+
)
|
73
|
+
assert source_base == "gs://cpr-ml-dev-us-east1"
|
74
|
+
assert source_path == "cpr/tests/test_data/pw_index.tsv"
|
75
|
+
|
76
|
+
|
77
|
+
def test_get_source_base_and_path_local():
|
78
|
+
source_base, source_path = utils.get_source_base_and_path(
|
79
|
+
"/test_data/bla/pw_index.tsv"
|
80
|
+
)
|
81
|
+
assert source_base == "/test_data/bla"
|
82
|
+
assert source_path == "pw_index.tsv"
|
83
|
+
|
84
|
+
|
85
|
+
def test_get_source_base_and_path_local_rel():
|
86
|
+
source_base, source_path = utils.get_source_base_and_path(
|
87
|
+
"./test_data/bla/pw_index.tsv"
|
88
|
+
)
|
89
|
+
assert source_base == "./test_data/bla"
|
90
|
+
assert source_path == "pw_index.tsv"
|
91
|
+
|
92
|
+
|
93
|
+
def test_get_source_base_and_path_local_direct():
|
94
|
+
source_base, source_path = utils.get_source_base_and_path("pw_index.tsv")
|
95
|
+
assert source_base == ""
|
96
|
+
assert source_path == "pw_index.tsv"
|
97
|
+
|
98
|
+
|
99
|
+
def test_initialize_dir_new(tmp_new_subdir):
|
100
|
+
utils.initialize_dir(tmp_new_subdir, overwrite=False)
|
101
|
+
assert tmp_new_subdir.exists()
|
102
|
+
|
103
|
+
|
104
|
+
@pytest.mark.unix_only
|
105
|
+
def test_initialize_dir_new_gcs(gcs_bucket_uri):
|
106
|
+
test_uri = f"{gcs_bucket_uri}/testdir"
|
107
|
+
utils.initialize_dir(test_uri, overwrite=False)
|
108
|
+
utils.path_exists(test_uri)
|
109
|
+
|
110
|
+
|
111
|
+
def test_initialize_dir_new_2_layers(tmp_new_subdir):
|
112
|
+
target_sub_dir = tmp_new_subdir / "test_dir_2"
|
113
|
+
utils.initialize_dir(target_sub_dir, overwrite=False)
|
114
|
+
assert target_sub_dir.exists()
|
115
|
+
|
116
|
+
|
117
|
+
@pytest.mark.unix_only
|
118
|
+
def test_initialize_dir_new_2_layers_gcs(gcs_bucket_uri):
|
119
|
+
test_uri = f"{gcs_bucket_uri}/testdir/testdir2"
|
120
|
+
utils.initialize_dir(test_uri, overwrite=False)
|
121
|
+
utils.path_exists(test_uri)
|
122
|
+
|
123
|
+
|
124
|
+
def test_initialize_dir_existing(tmp_new_subdir):
|
125
|
+
tmp_new_subdir.mkdir()
|
126
|
+
|
127
|
+
test_file = tmp_new_subdir / "test_file"
|
128
|
+
test_file.touch()
|
129
|
+
|
130
|
+
with pytest.raises(FileExistsError):
|
131
|
+
utils.initialize_dir(tmp_new_subdir, overwrite=False)
|
132
|
+
assert test_file.exists()
|
133
|
+
|
134
|
+
utils.initialize_dir(tmp_new_subdir, overwrite=True)
|
135
|
+
assert test_file.exists() is False
|
136
|
+
|
137
|
+
|
138
|
+
@pytest.mark.unix_only
|
139
|
+
def test_initialize_dir_existing_gcs(gcs_bucket, gcs_bucket_uri):
|
140
|
+
# create the file
|
141
|
+
create_blob(gcs_bucket, "testdir/file")
|
142
|
+
# This is a drawback of the current implementation - folders are only
|
143
|
+
# recognized if they have a marker file.
|
144
|
+
create_blob(gcs_bucket, "testdir/")
|
145
|
+
|
146
|
+
test_uri = f"{gcs_bucket_uri}/testdir"
|
147
|
+
test_uri_file = f"{test_uri}/file"
|
148
|
+
with pytest.raises(FileExistsError):
|
149
|
+
utils.initialize_dir(test_uri, overwrite=False)
|
150
|
+
assert utils.path_exists(test_uri_file)
|
151
|
+
|
152
|
+
utils.initialize_dir(test_uri, overwrite=True)
|
153
|
+
assert utils.path_exists(test_uri_file) is False
|
154
|
+
|
155
|
+
|
156
|
+
def mock_targ_gz(url, tmp_file):
|
157
|
+
with TarFS(tmp_file, write=True) as fol:
|
158
|
+
with fol.open("test.txt", "w") as f:
|
159
|
+
f.write("test")
|
160
|
+
|
161
|
+
|
162
|
+
def mock_zip(url, tmp_file):
|
163
|
+
with ZipFS(tmp_file, write=True) as fol:
|
164
|
+
with fol.open("test.txt", "w") as f:
|
165
|
+
f.write("test")
|
166
|
+
|
167
|
+
|
168
|
+
def mock_gz(url, tmp_file):
|
169
|
+
with gzip.open(tmp_file, mode="wt") as f:
|
170
|
+
f.write("test")
|
171
|
+
|
172
|
+
|
173
|
+
@patch("napistu.utils.download_wget", side_effect=mock_targ_gz)
|
174
|
+
def test_download_and_extract_tar_gz(mock_download, tmp_new_subdir):
|
175
|
+
utils.download_and_extract(
|
176
|
+
url="http://asdf/bla.tar.gz",
|
177
|
+
output_dir_path=tmp_new_subdir,
|
178
|
+
download_method="wget",
|
179
|
+
)
|
180
|
+
assert (tmp_new_subdir / "test.txt").exists()
|
181
|
+
|
182
|
+
|
183
|
+
@patch("napistu.utils.download_ftp", side_effect=mock_zip)
|
184
|
+
def test_download_and_extract_zip(mock_download, tmp_new_subdir):
|
185
|
+
utils.download_and_extract(
|
186
|
+
url="http://asdf/bla.txt.zip",
|
187
|
+
output_dir_path=tmp_new_subdir,
|
188
|
+
download_method="ftp",
|
189
|
+
)
|
190
|
+
assert (tmp_new_subdir / "test.txt").exists()
|
191
|
+
|
192
|
+
|
193
|
+
@patch("napistu.utils.download_wget", side_effect=mock_gz)
|
194
|
+
def test_download_and_extract_gz(mock_download, tmp_new_subdir):
|
195
|
+
utils.download_and_extract(
|
196
|
+
url="http://asdf/bla.txt.gz",
|
197
|
+
output_dir_path=tmp_new_subdir,
|
198
|
+
download_method="wget",
|
199
|
+
)
|
200
|
+
assert (tmp_new_subdir / "bla.txt").exists()
|
201
|
+
|
202
|
+
|
203
|
+
def test_download_and_extract_invalid_method(tmp_new_subdir):
|
204
|
+
with pytest.raises(ValueError):
|
205
|
+
utils.download_and_extract(
|
206
|
+
url="http://asdf/bla.txt.zip",
|
207
|
+
output_dir_path=tmp_new_subdir,
|
208
|
+
download_method="bla",
|
209
|
+
)
|
210
|
+
|
211
|
+
|
212
|
+
@patch("napistu.utils.download_ftp", side_effect=mock_zip)
|
213
|
+
def test_download_and_extract_invalid_ext(mock_download, tmp_new_subdir):
|
214
|
+
with pytest.raises(ValueError):
|
215
|
+
utils.download_and_extract(
|
216
|
+
url="http://asdf/bla.txt.zipper",
|
217
|
+
output_dir_path=tmp_new_subdir,
|
218
|
+
download_method="ftp",
|
219
|
+
)
|
220
|
+
|
221
|
+
|
222
|
+
def test_path_exists(tmp_path, tmp_new_subdir):
|
223
|
+
assert utils.path_exists(tmp_path)
|
224
|
+
assert utils.path_exists(tmp_new_subdir) is False
|
225
|
+
fn = tmp_path / "test.txt"
|
226
|
+
assert utils.path_exists(fn) is False
|
227
|
+
fn.touch()
|
228
|
+
assert utils.path_exists(fn)
|
229
|
+
assert utils.path_exists(".")
|
230
|
+
tmp_new_subdir.mkdir()
|
231
|
+
assert utils.path_exists(tmp_new_subdir)
|
232
|
+
|
233
|
+
|
234
|
+
@pytest.mark.unix_only
|
235
|
+
def test_path_exists_gcs(gcs_bucket, gcs_bucket_uri):
|
236
|
+
assert utils.path_exists(gcs_bucket_uri)
|
237
|
+
test_dir = "testdir"
|
238
|
+
gcs_test_dir_uri = f"{gcs_bucket_uri}/{test_dir}"
|
239
|
+
assert utils.path_exists(gcs_test_dir_uri) is False
|
240
|
+
# Create the marker file for the directory, such that it 'exists'
|
241
|
+
create_blob(gcs_bucket, f"{test_dir}/")
|
242
|
+
assert utils.path_exists(gcs_test_dir_uri)
|
243
|
+
|
244
|
+
# Test if files exists
|
245
|
+
test_file = f"{test_dir}/test.txt"
|
246
|
+
gcs_test_file_uri = f"{gcs_bucket_uri}/{test_file}"
|
247
|
+
assert utils.path_exists(gcs_test_file_uri) is False
|
248
|
+
# create the file
|
249
|
+
create_blob(gcs_bucket, test_file)
|
250
|
+
assert utils.path_exists(gcs_test_file_uri)
|
251
|
+
|
252
|
+
|
253
|
+
@pytest.mark.unix_only
|
254
|
+
def test_save_load_pickle_existing_folder(tmp_path):
|
255
|
+
fn = tmp_path / "test.pkl"
|
256
|
+
payload = "test"
|
257
|
+
utils.save_pickle(fn, payload)
|
258
|
+
assert fn.exists()
|
259
|
+
assert utils.load_pickle(fn) == payload
|
260
|
+
|
261
|
+
|
262
|
+
@pytest.mark.skip_on_windows
|
263
|
+
def test_save_load_pickle_new_folder(tmp_new_subdir):
|
264
|
+
fn = tmp_new_subdir / "test.pkl"
|
265
|
+
payload = "test"
|
266
|
+
utils.save_pickle(fn, payload)
|
267
|
+
assert fn.exists()
|
268
|
+
assert utils.load_pickle(fn) == payload
|
269
|
+
|
270
|
+
|
271
|
+
@pytest.mark.unix_only
|
272
|
+
def test_save_load_pickle_existing_folder_gcs(gcs_bucket_uri):
|
273
|
+
fn = f"{gcs_bucket_uri}/test.pkl"
|
274
|
+
payload = "test"
|
275
|
+
utils.save_pickle(fn, payload)
|
276
|
+
assert utils.path_exists(fn)
|
277
|
+
assert utils.load_pickle(fn) == payload
|
278
|
+
|
279
|
+
|
280
|
+
@pytest.mark.unix_only
|
281
|
+
def test_save_load_pickle_new_folder_gcs(gcs_bucket_subdir_uri):
|
282
|
+
fn = f"{gcs_bucket_subdir_uri}/test.pkl"
|
283
|
+
payload = "test"
|
284
|
+
utils.save_pickle(fn, payload)
|
285
|
+
assert utils.path_exists(fn)
|
286
|
+
assert utils.load_pickle(fn) == payload
|
287
|
+
|
288
|
+
|
289
|
+
@pytest.mark.skip_on_windows
|
290
|
+
def test_copy_uri_file(tmp_path, tmp_new_subdir):
|
291
|
+
basename = "test.txt"
|
292
|
+
fn = tmp_path / basename
|
293
|
+
fn.write_text("test")
|
294
|
+
fn_out = tmp_new_subdir / "test_out.txt"
|
295
|
+
utils.copy_uri(fn, fn_out)
|
296
|
+
assert fn_out.read_text() == "test"
|
297
|
+
|
298
|
+
|
299
|
+
@pytest.mark.skip_on_windows
|
300
|
+
def test_copy_uri_fol(tmp_path, tmp_new_subdir):
|
301
|
+
tmp_new_subdir.mkdir()
|
302
|
+
(tmp_new_subdir / "test").touch()
|
303
|
+
|
304
|
+
out_dir = tmp_path / "out"
|
305
|
+
out_file = out_dir / "test"
|
306
|
+
utils.copy_uri(tmp_new_subdir, out_dir, is_file=False)
|
307
|
+
assert out_file.exists()
|
308
|
+
|
309
|
+
|
310
|
+
@pytest.mark.unix_only
|
311
|
+
def test_copy_uri_file_gcs(gcs_bucket_uri, gcs_bucket_subdir_uri):
|
312
|
+
basename = "test.txt"
|
313
|
+
content = "test"
|
314
|
+
fn = f"{gcs_bucket_uri}/{basename}"
|
315
|
+
utils.save_pickle(fn, content)
|
316
|
+
fn_out = f"{gcs_bucket_subdir_uri}/{basename}"
|
317
|
+
utils.copy_uri(fn, fn_out)
|
318
|
+
assert utils.path_exists(fn_out)
|
319
|
+
assert utils.load_pickle(fn_out) == content
|
320
|
+
|
321
|
+
|
322
|
+
@pytest.mark.unix_only
|
323
|
+
def test_copy_uri_fol_gcs(gcs_bucket_uri, gcs_bucket_subdir_uri):
|
324
|
+
basename = "test.txt"
|
325
|
+
content = "test"
|
326
|
+
fn = f"{gcs_bucket_subdir_uri}/{basename}"
|
327
|
+
utils.save_pickle(fn, content)
|
328
|
+
out_dir = f"{gcs_bucket_uri}/new_dir"
|
329
|
+
out_file = f"{out_dir}/{basename}"
|
330
|
+
utils.copy_uri(gcs_bucket_subdir_uri, out_dir, is_file=False)
|
331
|
+
assert utils.path_exists(out_file)
|
332
|
+
|
333
|
+
|
334
|
+
@pytest.mark.skip_on_windows
|
335
|
+
def test_pickle_cache(tmp_path):
|
336
|
+
fn = tmp_path / "test.pkl"
|
337
|
+
|
338
|
+
mock = Mock()
|
339
|
+
result = "test"
|
340
|
+
|
341
|
+
@utils.pickle_cache(fn)
|
342
|
+
def test_func():
|
343
|
+
mock()
|
344
|
+
return result
|
345
|
+
|
346
|
+
test_func()
|
347
|
+
r = test_func()
|
348
|
+
assert r == result
|
349
|
+
# only called once as second
|
350
|
+
# call should be cached
|
351
|
+
assert mock.call_count == 1
|
352
|
+
|
353
|
+
|
354
|
+
def test_extract_regex():
|
355
|
+
assert utils.extract_regex_search("ENS[GT][0-9]+", "ENST0005") == "ENST0005"
|
356
|
+
assert utils.extract_regex_search("ENS[GT]([0-9]+)", "ENST0005", 1) == "0005"
|
357
|
+
with pytest.raises(ValueError):
|
358
|
+
utils.extract_regex_search("ENS[GT][0-9]+", "ENSA0005")
|
359
|
+
|
360
|
+
assert utils.extract_regex_match(".*type=([a-zA-Z]+).*", "Ltype=abcd5") == "abcd"
|
361
|
+
# use for formatting identifiers
|
362
|
+
assert utils.extract_regex_match("^([a-zA-Z]+)_id$", "sc_id") == "sc"
|
363
|
+
with pytest.raises(ValueError):
|
364
|
+
utils.extract_regex_match(".*type=[a-zA-Z]+.*", "Ltype=abcd5")
|
365
|
+
|
366
|
+
|
367
|
+
def test_match_pd_vars():
|
368
|
+
a_series = pd.Series({"foo": 1, "bar": 2})
|
369
|
+
a_dataframe = pd.DataFrame({"foo": ["a", "b"], "bar": [1, 2]})
|
370
|
+
|
371
|
+
assert utils.match_pd_vars(a_series, {"foo", "bar"}).are_present
|
372
|
+
assert not utils.match_pd_vars(a_series, {"baz"}).are_present
|
373
|
+
assert utils.match_pd_vars(a_dataframe, {"foo", "bar"}).are_present
|
374
|
+
assert not utils.match_pd_vars(a_dataframe, {"baz"}).are_present
|
375
|
+
|
376
|
+
|
377
|
+
def test_ensure_pd_df():
|
378
|
+
source_df = pd.DataFrame({"a": "b"}, index=[0])
|
379
|
+
source_series = pd.Series({"a": "b"}).rename(0)
|
380
|
+
|
381
|
+
converted_series = utils.ensure_pd_df(source_series)
|
382
|
+
|
383
|
+
assert isinstance(utils.ensure_pd_df(source_df), pd.DataFrame)
|
384
|
+
assert isinstance(converted_series, pd.DataFrame)
|
385
|
+
assert all(converted_series.index == source_df.index)
|
386
|
+
assert all(converted_series.columns == source_df.columns)
|
387
|
+
assert all(converted_series == source_df)
|
388
|
+
|
389
|
+
|
390
|
+
def test_format_identifiers_as_edgelist():
|
391
|
+
DEGEN_EDGELIST_DF_1 = pd.DataFrame(
|
392
|
+
{
|
393
|
+
"ind1": [0, 0, 1, 1, 1, 1],
|
394
|
+
"ind2": ["a", "a", "b", "b", "c", "d"],
|
395
|
+
"ont": ["X", "X", "X", "Y", "Y", "Y"],
|
396
|
+
"val": ["A", "B", "C", "D", "D", "E"],
|
397
|
+
}
|
398
|
+
).set_index(["ind1", "ind2"])
|
399
|
+
|
400
|
+
DEGEN_EDGELIST_DF_2 = pd.DataFrame(
|
401
|
+
{
|
402
|
+
"ind": ["a", "a", "b", "b", "c", "d"],
|
403
|
+
"ont": ["X", "X", "X", "Y", "Y", "Y"],
|
404
|
+
"val": ["A", "B", "C", "D", "D", "E"],
|
405
|
+
}
|
406
|
+
).set_index("ind")
|
407
|
+
|
408
|
+
edgelist_df = utils.format_identifiers_as_edgelist(
|
409
|
+
DEGEN_EDGELIST_DF_1, ["ont", "val"]
|
410
|
+
)
|
411
|
+
assert edgelist_df["ind"].iloc[0] == "ind_0_a"
|
412
|
+
assert edgelist_df["id"].iloc[0] == "id_X_A"
|
413
|
+
|
414
|
+
edgelist_df = utils.format_identifiers_as_edgelist(DEGEN_EDGELIST_DF_1, ["val"])
|
415
|
+
assert edgelist_df["ind"].iloc[0] == "ind_0_a"
|
416
|
+
assert edgelist_df["id"].iloc[0] == "id_A"
|
417
|
+
|
418
|
+
edgelist_df = utils.format_identifiers_as_edgelist(
|
419
|
+
DEGEN_EDGELIST_DF_2, ["ont", "val"]
|
420
|
+
)
|
421
|
+
assert edgelist_df["ind"].iloc[0] == "ind_a"
|
422
|
+
assert edgelist_df["id"].iloc[0] == "id_X_A"
|
423
|
+
|
424
|
+
with pytest.raises(ValueError):
|
425
|
+
utils.format_identifiers_as_edgelist(
|
426
|
+
DEGEN_EDGELIST_DF_2.reset_index(drop=True), ["ont", "val"]
|
427
|
+
)
|
428
|
+
|
429
|
+
|
430
|
+
def test_find_weakly_connected_subgraphs():
|
431
|
+
DEGEN_EDGELIST_DF_2 = pd.DataFrame(
|
432
|
+
{
|
433
|
+
"ind": ["a", "a", "b", "b", "c", "d"],
|
434
|
+
"ont": ["X", "X", "X", "Y", "Y", "Y"],
|
435
|
+
"val": ["A", "B", "C", "D", "D", "E"],
|
436
|
+
}
|
437
|
+
).set_index("ind")
|
438
|
+
|
439
|
+
edgelist_df = utils.format_identifiers_as_edgelist(
|
440
|
+
DEGEN_EDGELIST_DF_2, ["ont", "val"]
|
441
|
+
)
|
442
|
+
edgelist = edgelist_df[["ind", "id"]]
|
443
|
+
|
444
|
+
connected_indices = utils.find_weakly_connected_subgraphs(edgelist[["ind", "id"]])
|
445
|
+
assert all(connected_indices["cluster"] == [0, 1, 1, 2])
|
446
|
+
|
447
|
+
|
448
|
+
def test_style_df():
|
449
|
+
np.random.seed(0)
|
450
|
+
simple_df = pd.DataFrame(np.random.randn(20, 4), columns=["A", "B", "C", "D"])
|
451
|
+
simple_df.index.name = "foo"
|
452
|
+
|
453
|
+
multiindexed_df = (
|
454
|
+
pd.DataFrame(
|
455
|
+
{
|
456
|
+
"category": ["foo", "foo", "foo", "bar", "bar", "bar"],
|
457
|
+
"severity": ["major", "minor", "minor", "major", "major", "minor"],
|
458
|
+
}
|
459
|
+
)
|
460
|
+
.assign(message="stuff")
|
461
|
+
.groupby(["category", "severity"])
|
462
|
+
.count()
|
463
|
+
)
|
464
|
+
|
465
|
+
# style a few pd.DataFrames
|
466
|
+
isinstance(utils.style_df(simple_df), pd.io.formats.style.Styler)
|
467
|
+
isinstance(
|
468
|
+
utils.style_df(simple_df, headers=None, hide_index=True),
|
469
|
+
pd.io.formats.style.Styler,
|
470
|
+
)
|
471
|
+
isinstance(
|
472
|
+
utils.style_df(simple_df, headers=["a", "b", "c", "d"], hide_index=True),
|
473
|
+
pd.io.formats.style.Styler,
|
474
|
+
)
|
475
|
+
isinstance(utils.style_df(multiindexed_df), pd.io.formats.style.Styler)
|
476
|
+
|
477
|
+
|
478
|
+
def test_score_nameness():
|
479
|
+
assert utils.score_nameness("p53") == 23
|
480
|
+
assert utils.score_nameness("ENSG0000001") == 56
|
481
|
+
assert utils.score_nameness("pyruvate kinase") == 15
|
482
|
+
|
483
|
+
|
484
|
+
def test_click_str_to_list():
|
485
|
+
assert utils.click_str_to_list("['foo', bar]") == ["foo", "bar"]
|
486
|
+
with pytest.raises(ValueError):
|
487
|
+
utils.click_str_to_list("foo")
|
tests/utils.py
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import os
|
4
|
+
import shutil
|
5
|
+
|
6
|
+
from napistu import indices
|
7
|
+
|
8
|
+
|
9
|
+
def test_setup(test_data_path="/home/sean/cpr/lib/python/cpr/tests/test_data"):
|
10
|
+
cpr_assets_root = "/group/cpr"
|
11
|
+
|
12
|
+
# setup sbmls
|
13
|
+
|
14
|
+
pw_index = indices.PWIndex(
|
15
|
+
os.path.join(cpr_assets_root, "reactome/sbml/pw_index.tsv")
|
16
|
+
)
|
17
|
+
pw_index.filter(species="Homo sapiens")
|
18
|
+
pw_index.search("carbon")
|
19
|
+
|
20
|
+
# add pw_index
|
21
|
+
pw_index.index.to_csv(os.path.join(test_data_path, "pw_index.tsv"), sep="\t")
|
22
|
+
|
23
|
+
# move all sbmls in pw_index
|
24
|
+
[
|
25
|
+
shutil.copyfile(
|
26
|
+
os.path.join(cpr_assets_root, "reactome", "sbml", f),
|
27
|
+
os.path.join(test_data_path, f),
|
28
|
+
)
|
29
|
+
for f in pw_index.index["file"].tolist()
|
30
|
+
]
|