napistu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. napistu/__init__.py +12 -0
  2. napistu/__main__.py +867 -0
  3. napistu/consensus.py +1557 -0
  4. napistu/constants.py +500 -0
  5. napistu/gcs/__init__.py +10 -0
  6. napistu/gcs/constants.py +69 -0
  7. napistu/gcs/downloads.py +180 -0
  8. napistu/identifiers.py +805 -0
  9. napistu/indices.py +227 -0
  10. napistu/ingestion/__init__.py +10 -0
  11. napistu/ingestion/bigg.py +146 -0
  12. napistu/ingestion/constants.py +296 -0
  13. napistu/ingestion/cpr_edgelist.py +106 -0
  14. napistu/ingestion/identifiers_etl.py +148 -0
  15. napistu/ingestion/obo.py +268 -0
  16. napistu/ingestion/psi_mi.py +276 -0
  17. napistu/ingestion/reactome.py +218 -0
  18. napistu/ingestion/sbml.py +621 -0
  19. napistu/ingestion/string.py +356 -0
  20. napistu/ingestion/trrust.py +285 -0
  21. napistu/ingestion/yeast.py +147 -0
  22. napistu/mechanism_matching.py +597 -0
  23. napistu/modify/__init__.py +10 -0
  24. napistu/modify/constants.py +86 -0
  25. napistu/modify/curation.py +628 -0
  26. napistu/modify/gaps.py +635 -0
  27. napistu/modify/pathwayannot.py +1381 -0
  28. napistu/modify/uncompartmentalize.py +264 -0
  29. napistu/network/__init__.py +10 -0
  30. napistu/network/constants.py +117 -0
  31. napistu/network/neighborhoods.py +1594 -0
  32. napistu/network/net_create.py +1647 -0
  33. napistu/network/net_utils.py +652 -0
  34. napistu/network/paths.py +500 -0
  35. napistu/network/precompute.py +221 -0
  36. napistu/rpy2/__init__.py +127 -0
  37. napistu/rpy2/callr.py +168 -0
  38. napistu/rpy2/constants.py +101 -0
  39. napistu/rpy2/netcontextr.py +464 -0
  40. napistu/rpy2/rids.py +697 -0
  41. napistu/sbml_dfs_core.py +2216 -0
  42. napistu/sbml_dfs_utils.py +304 -0
  43. napistu/source.py +394 -0
  44. napistu/utils.py +943 -0
  45. napistu-0.1.0.dist-info/METADATA +56 -0
  46. napistu-0.1.0.dist-info/RECORD +77 -0
  47. napistu-0.1.0.dist-info/WHEEL +5 -0
  48. napistu-0.1.0.dist-info/entry_points.txt +2 -0
  49. napistu-0.1.0.dist-info/licenses/LICENSE +21 -0
  50. napistu-0.1.0.dist-info/top_level.txt +2 -0
  51. tests/__init__.py +0 -0
  52. tests/conftest.py +83 -0
  53. tests/test_consensus.py +255 -0
  54. tests/test_constants.py +20 -0
  55. tests/test_curation.py +134 -0
  56. tests/test_data/__init__.py +0 -0
  57. tests/test_edgelist.py +20 -0
  58. tests/test_gcs.py +23 -0
  59. tests/test_identifiers.py +151 -0
  60. tests/test_igraph.py +353 -0
  61. tests/test_indices.py +88 -0
  62. tests/test_mechanism_matching.py +126 -0
  63. tests/test_net_utils.py +66 -0
  64. tests/test_netcontextr.py +105 -0
  65. tests/test_obo.py +34 -0
  66. tests/test_pathwayannot.py +95 -0
  67. tests/test_precomputed_distances.py +222 -0
  68. tests/test_rpy2.py +61 -0
  69. tests/test_sbml.py +46 -0
  70. tests/test_sbml_dfs_create.py +307 -0
  71. tests/test_sbml_dfs_utils.py +22 -0
  72. tests/test_sbo.py +11 -0
  73. tests/test_set_coverage.py +50 -0
  74. tests/test_source.py +67 -0
  75. tests/test_uncompartmentalize.py +40 -0
  76. tests/test_utils.py +487 -0
  77. tests/utils.py +30 -0
@@ -0,0 +1,221 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import math
5
+
6
+ import igraph as ig
7
+ import numpy as np
8
+ import pandas as pd
9
+
10
+ from napistu.network import net_utils
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ def precompute_distances(
16
+ cpr_graph: ig.Graph,
17
+ max_steps: int = -1,
18
+ max_score_q: float = float(1),
19
+ partition_size: int = int(5000),
20
+ weights_vars: list[str] = ["weights", "upstream_weights"],
21
+ ) -> pd.DataFrame:
22
+ """
23
+ Pre-Compute Distances
24
+
25
+ Parameters
26
+ ----------
27
+ cpr_graph: ig.Graph
28
+ An igraph network model
29
+ max_steps: int
30
+ The maximum number of steps between pairs of species to save a distance
31
+ max_score_q: float
32
+ Retain up to the "max_score_q" quantiles of all scores (small scores are better)
33
+ partition_size: int
34
+ The number of species to process together when computing distances. Decreasing this
35
+ value will lower the overall memory footprint of distance calculation.
36
+ weights_vars: list
37
+ One or more variables defining edge weights to use when calculating weighted
38
+ shortest paths. Shortest paths will be separately calculated with each type of
39
+ weights and used to construct path weights named according to 'path_{weight_var}'
40
+
41
+ Returns:
42
+ ----------
43
+ A pd.DataFrame containing:
44
+ - sc_id_origin: origin node
45
+ - sc_id_dest: destination node
46
+ - path_length: minimum path length between from and to
47
+ - path_weight*: minimum path weight between from and to (formed by summing the weights of individual edges).
48
+ *One variable will exist for each weight specified in 'weights_vars'
49
+
50
+ """
51
+
52
+ if max_steps == -1:
53
+ max_steps = int(100000)
54
+
55
+ # validate inputs
56
+ if max_steps < 1:
57
+ raise ValueError(f"max_steps must >= 1, but was {max_steps}")
58
+
59
+ if (max_score_q < 0) or (max_score_q > 1):
60
+ raise ValueError(f"max_score_q must be between 0 and 1 but was {max_score_q}")
61
+
62
+ # make sure weight vars exist
63
+ net_utils._validate_edge_attributes(cpr_graph, weights_vars)
64
+
65
+ # assign molecular species to partitions
66
+ vs_to_partition = pd.DataFrame(
67
+ {"sc_id": cpr_graph.vs["name"], "node_type": cpr_graph.vs["node_type"]}
68
+ ).query("node_type == 'species'")
69
+
70
+ n_paritions = math.ceil(vs_to_partition.shape[0] / partition_size)
71
+
72
+ vs_to_partition["partition"] = vs_to_partition.index % n_paritions
73
+ vs_to_partition = vs_to_partition.set_index("partition").sort_index()
74
+
75
+ # interate through all partitions of "from" nodes and find their shortest and lowest weighted paths
76
+ unique_partitions = vs_to_partition.index.unique().tolist()
77
+
78
+ precomputed_distances = (
79
+ pd.concat(
80
+ [
81
+ _calculate_distances_subset(
82
+ cpr_graph,
83
+ vs_to_partition,
84
+ vs_to_partition.loc[uq_part],
85
+ weights_vars=weights_vars,
86
+ )
87
+ for uq_part in unique_partitions
88
+ ]
89
+ )
90
+ .reset_index(drop=True)
91
+ .query("sc_id_origin != sc_id_dest")
92
+ )
93
+
94
+ # filter by path length and/or weight
95
+ filtered_precomputed_distances = _filter_precomputed_distances(
96
+ precomputed_distances=precomputed_distances,
97
+ max_steps=max_steps,
98
+ max_score_q=max_score_q,
99
+ path_weights_vars=["path_" + w for w in weights_vars],
100
+ )
101
+
102
+ return filtered_precomputed_distances
103
+
104
+
105
+ def _calculate_distances_subset(
106
+ cpr_graph: ig.Graph,
107
+ vs_to_partition: pd.DataFrame,
108
+ one_partition: pd.DataFrame,
109
+ weights_vars: list[str] = ["weights", "upstream_weights"],
110
+ ) -> pd.DataFrame:
111
+ """Calculate distances from a subset of vertices to all vertices."""
112
+
113
+ d_steps = (
114
+ pd.DataFrame(
115
+ np.array(
116
+ cpr_graph.distances(
117
+ source=one_partition["sc_id"], target=vs_to_partition["sc_id"]
118
+ )
119
+ ),
120
+ index=one_partition["sc_id"].rename("sc_id_origin"),
121
+ columns=vs_to_partition["sc_id"].rename("sc_id_dest"),
122
+ )
123
+ .reset_index()
124
+ .melt("sc_id_origin", value_name="path_length")
125
+ .replace([np.inf, -np.inf], np.nan, inplace=False)
126
+ .dropna()
127
+ )
128
+
129
+ d_weights_list = list()
130
+ for weight_type in weights_vars:
131
+ d_weights_subset = (
132
+ pd.DataFrame(
133
+ np.array(
134
+ cpr_graph.distances(
135
+ source=one_partition["sc_id"],
136
+ target=vs_to_partition["sc_id"],
137
+ weights=weight_type,
138
+ )
139
+ ),
140
+ index=one_partition["sc_id"].rename("sc_id_origin"),
141
+ columns=vs_to_partition["sc_id"].rename("sc_id_dest"),
142
+ )
143
+ .reset_index()
144
+ .melt("sc_id_origin", value_name=f"path_{weight_type}")
145
+ .replace([np.inf, -np.inf], np.nan, inplace=False)
146
+ .dropna()
147
+ )
148
+
149
+ d_weights_list.append(d_weights_subset)
150
+
151
+ d_weights = d_weights_list.pop()
152
+ while len(d_weights_list) > 0:
153
+ d_weights = d_weights.merge(d_weights_list.pop())
154
+
155
+ # merge shortest path distances by length and by weight
156
+ # note: these may be different paths! e.g., a longer path may have a lower weight than a short one
157
+ path_summaries = d_steps.merge(
158
+ d_weights,
159
+ left_on=["sc_id_origin", "sc_id_dest"],
160
+ right_on=["sc_id_origin", "sc_id_dest"],
161
+ )
162
+
163
+ # return connected species
164
+ return path_summaries
165
+
166
+
167
+ def _filter_precomputed_distances(
168
+ precomputed_distances: pd.DataFrame,
169
+ max_steps: float | int = np.inf,
170
+ max_score_q: float = 1,
171
+ path_weights_vars: list[str] = ["path_weights", "path_upstream_weights"],
172
+ ) -> pd.DataFrame:
173
+ """Filter precomputed distances by maximum steps and/or to low scores by quantile."""
174
+
175
+ # filter by path lengths
176
+ short_precomputed_distances = precomputed_distances[
177
+ precomputed_distances["path_length"] <= max_steps
178
+ ]
179
+ n_filtered_by_path_length = (
180
+ precomputed_distances.shape[0] - short_precomputed_distances.shape[0]
181
+ )
182
+ if n_filtered_by_path_length > 0:
183
+ logger.info(
184
+ f"filtered {n_filtered_by_path_length} possible paths with length > {max_steps}"
185
+ )
186
+
187
+ # filter by path weights
188
+ for wt_var in path_weights_vars:
189
+ score_q_cutoff = np.quantile(short_precomputed_distances[wt_var], max_score_q)
190
+
191
+ short_precomputed_distances.loc[
192
+ short_precomputed_distances[wt_var] > score_q_cutoff, wt_var
193
+ ] = np.nan
194
+
195
+ valid_weights = short_precomputed_distances[path_weights_vars].dropna(how="all")
196
+
197
+ low_weight_precomputed_distances = short_precomputed_distances[
198
+ short_precomputed_distances.index.isin(valid_weights.index.tolist())
199
+ ]
200
+
201
+ n_filtered_by_low_weight = (
202
+ short_precomputed_distances.shape[0] - low_weight_precomputed_distances.shape[0]
203
+ )
204
+
205
+ if n_filtered_by_low_weight > 0:
206
+ logger.info(
207
+ f"filtered {n_filtered_by_low_weight} possible paths with path weights greater"
208
+ )
209
+ logger.info(f"than the {max_score_q} quantile of the path weight distribution")
210
+
211
+ weight_nan_summary = valid_weights.isnull().sum()
212
+ if any(weight_nan_summary != 0):
213
+ nan_summary = " and ".join(
214
+ [
215
+ f"{k} has {v} np.nan values"
216
+ for k, v in weight_nan_summary.to_dict().items()
217
+ ]
218
+ )
219
+ logger.info(nan_summary)
220
+
221
+ return low_weight_precomputed_distances
@@ -0,0 +1,127 @@
1
+ from __future__ import annotations
2
+
3
+ import functools
4
+ import logging
5
+ import os
6
+ import sys
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ try:
11
+ import rpy2 # noqa
12
+
13
+ has_rpy2 = True
14
+
15
+ from rpy2.robjects import conversion, default_converter # noqa
16
+ from rpy2.robjects.packages import importr # noqa
17
+
18
+ except ImportError:
19
+ has_rpy2 = False
20
+ logger.warning(
21
+ "rpy2 is not installed. "
22
+ "Some functions will not work. "
23
+ "Consider installing `cpr[rpy2]`."
24
+ )
25
+ except Exception as e:
26
+ has_rpy2 = False
27
+ print(e)
28
+ logger.warning("rpy2 initialization failed with an unrecognized exception.")
29
+
30
+
31
+ def warn_if_no_rpy2(func):
32
+ @functools.wraps(func)
33
+ def warn_if_no_rpy2_wrapper(*args, **kwargs):
34
+ if not has_rpy2:
35
+ raise ImportError(
36
+ "This function requires `rpy2`. \n"
37
+ "Please install `cpr` with the `rpy2` extra dependencies. \n"
38
+ "For example: `pip install cpr[rpy2]`\n"
39
+ )
40
+ return func(*args, **kwargs)
41
+
42
+ return warn_if_no_rpy2_wrapper
43
+
44
+
45
+ def rsession_info() -> None:
46
+ # report summaries of the R installation found by rpy2
47
+ # default converters bundled with rpy2 are used
48
+ # for this step rather than those bundled with rpy2_arrow
49
+ # because rpy2_arrow requires the arrow R package so
50
+ # it can be difficult to import this package without
51
+ # a valid R setup.
52
+
53
+ with conversion.localconverter(default_converter):
54
+ base = importr("base")
55
+ utils = importr("utils")
56
+
57
+ lib_paths = base._libPaths()
58
+ session_info = utils.sessionInfo()
59
+
60
+ logger.warning(
61
+ "An exception occurred when running some rpy2-related functionality\n"
62
+ "Here is a summary of your R session\n"
63
+ f"Using R version in {base.R_home()[0]}\n"
64
+ ".libPaths ="
65
+ )
66
+ logger.warning("\n".join(lib_paths))
67
+ logger.warning(f"sessionInfo = {session_info}")
68
+ # suggest a fix
69
+ logger.warning(_r_homer_warning())
70
+
71
+ return None
72
+
73
+
74
+ def _r_homer_warning() -> None:
75
+ # utility function to suggest installation directions for R
76
+ # as part of rsession
77
+
78
+ is_conda = os.path.exists(os.path.join(sys.prefix, "conda-meta"))
79
+ if is_conda:
80
+ r_lib_path = os.path.join(sys.prefix, "lib", "R")
81
+ if os.path.isdir(r_lib_path):
82
+ logging.warning(
83
+ "You seem to be working in a conda environment with R installed.\n"
84
+ "If this version was not located by rpy2 then then try to set R_HOME using:\n"
85
+ f"os.environ['R_HOME'] = {r_lib_path}"
86
+ )
87
+ else:
88
+ logging.warning(
89
+ "You seem to be working in a conda environment but R is NOT installed.\n"
90
+ "If this is the case then install R, the CPR R package and the R arrow package into your\n"
91
+ "conda environment and then set the R_HOME environmental variable using:\n"
92
+ "os.environ['R_HOME'] = <<PATH_TO_R_lib/R>>"
93
+ )
94
+ else:
95
+ logging.warning(
96
+ "If you don't have R installed or if your desired R library does not match the\n"
97
+ "one above, then set your R_HOME environmental variable using:\n"
98
+ "os.environ['R_HOME'] = <<PATH_TO_lib/R>>"
99
+ )
100
+
101
+ return None
102
+
103
+
104
+ def report_r_exceptions(function):
105
+ @functools.wraps(function)
106
+ def report_r_exceptions_wrapper(*args, **kwargs):
107
+ if not has_rpy2:
108
+ raise ImportError(
109
+ "This function requires `rpy2`. \n"
110
+ "Please install `cpr` with the `rpy2` extra dependencies. \n"
111
+ "For example: `pip install cpr[rpy2]`\n"
112
+ )
113
+ try:
114
+ return function(*args, **kwargs)
115
+ except Exception as e:
116
+ # log the exception
117
+ err = "There was an exception in "
118
+ err += function.__name__
119
+
120
+ logger.warning(err)
121
+ # report session info
122
+ rsession_info()
123
+
124
+ # re-raise the exception
125
+ raise e
126
+
127
+ return report_r_exceptions_wrapper
napistu/rpy2/callr.py ADDED
@@ -0,0 +1,168 @@
1
+ from __future__ import annotations
2
+
3
+ import pandas as pd
4
+ from napistu.rpy2 import has_rpy2
5
+ from napistu.rpy2 import report_r_exceptions
6
+ from napistu.rpy2 import rsession_info
7
+ from napistu.rpy2 import warn_if_no_rpy2
8
+
9
+ if has_rpy2:
10
+ from rpy2.robjects import pandas2ri
11
+ from rpy2.robjects.packages import importr
12
+ from rpy2.robjects.packages import InstalledSTPackage, InstalledPackage
13
+ import pyarrow
14
+
15
+ # loading rpy2_arrow checks whether the R arrow package is found
16
+ # this is the first time when a non-standard R package is loaded
17
+ # so a bad R setup can cause issues at this stage
18
+ # rsession_info() adds some helpful debugging information
19
+ try:
20
+ import rpy2_arrow.arrow as pyra
21
+ except Exception as e:
22
+ rsession_info()
23
+ raise e
24
+ import rpy2.robjects.conversion
25
+ import rpy2.rinterface
26
+ import rpy2.robjects as ro
27
+
28
+
29
+ @warn_if_no_rpy2
30
+ @report_r_exceptions
31
+ def get_rcpr(
32
+ r_paths: list[str] | None = None,
33
+ ):
34
+ """
35
+ Get rcpr
36
+
37
+ Gets the rcpr R package
38
+
39
+ Args:
40
+ r_paths (list[str]):
41
+ Paths to add to .libPaths() in R
42
+
43
+ Returns:
44
+ rcpr R package
45
+ """
46
+
47
+ _ = get_rbase(r_paths)
48
+
49
+ # connect the cpr R package
50
+ rcpr = importr("rcpr")
51
+ return rcpr
52
+
53
+
54
+ @warn_if_no_rpy2
55
+ @report_r_exceptions
56
+ def bioconductor_org_r_function(
57
+ object_type: str, species: str, r_paths: list[str] | None = None
58
+ ):
59
+ """
60
+ Bioconuctor Organism R Function
61
+
62
+ Calls "bioconductor_org_function" from the R cpr package to pull a mapping object
63
+ out of a species specific library.
64
+
65
+ Parameters:
66
+ object_type (str):
67
+ Type of function to call
68
+ species (str):
69
+ Species name
70
+ r_paths: list(str):
71
+ Paths to add to .libPaths() in R. Alternatively consider setting the R_HOME env variable.
72
+
73
+ Returns:
74
+ pd.DataFrame or a function for non-tabular results
75
+ """
76
+
77
+ _ = get_rbase(r_paths)
78
+
79
+ # connect the cpr R package
80
+ cpr = importr("rcpr")
81
+
82
+ results = cpr.bioconductor_org_function(object_type, species)
83
+
84
+ return results
85
+
86
+
87
+ @report_r_exceptions
88
+ def get_rbase(
89
+ r_paths: list[str] | None = None,
90
+ ) -> InstalledSTPackage | InstalledPackage:
91
+ """Get the base R package
92
+
93
+ Args:
94
+ r_paths (list[str], optional): Optional additional
95
+ r_paths. Defaults to None.
96
+
97
+ Returns:
98
+ _type_: _description_
99
+ """
100
+ base = importr("base")
101
+ if r_paths is not None:
102
+ base._libPaths(r_paths)
103
+ return base
104
+
105
+
106
+ @warn_if_no_rpy2
107
+ @report_r_exceptions
108
+ def pandas_to_r_dataframe(df: pd.DataFrame) -> rpy2.robjects.DataFrame:
109
+ """Convert a pandas dataframe to an R dataframe
110
+
111
+ This uses the rpy2-arrow functionality
112
+ to increase the performance of conversion orders of magnitude.
113
+
114
+ Args:
115
+ df (pd.DataFrame): Pandas dataframe
116
+
117
+ Returns:
118
+ rpy2.robjects.DataFrame: R dataframe
119
+ """
120
+ conv = _get_py2rpy_pandas_conv()
121
+ with (ro.default_converter + conv).context():
122
+ r_df = ro.conversion.get_conversion().py2rpy(df)
123
+ return r_df
124
+
125
+
126
+ @warn_if_no_rpy2
127
+ @report_r_exceptions
128
+ def r_dataframe_to_pandas(rdf: rpy2.robjects.DataFrame) -> pd.DataFrame:
129
+ """Convert an R dataframe to a pandas dataframe
130
+
131
+ Args:
132
+ rdf (rpy2.robjects.DataFrame): R dataframe
133
+
134
+ Returns:
135
+ pd.DataFrame: Pandas dataframe
136
+ """
137
+ with (ro.default_converter + pandas2ri.converter).context():
138
+ df = ro.conversion.get_conversion().rpy2py(rdf)
139
+ return df
140
+
141
+
142
+ @warn_if_no_rpy2
143
+ @report_r_exceptions
144
+ def _get_py2rpy_pandas_conv():
145
+ """Get the py2rpy arrow converter for pandas
146
+
147
+ This is a high-performance converter using
148
+ the rpy2-arrow functionality:
149
+ https://rpy2.github.io/rpy2-arrow/version/main/html/index.html
150
+
151
+ Returns:
152
+ Callable: The converter function
153
+ """
154
+ base = get_rbase()
155
+ # We use the converter included in rpy2-arrow as template.
156
+ conv = rpy2.robjects.conversion.Converter(
157
+ "Pandas to data.frame", template=pyra.converter
158
+ )
159
+
160
+ @conv.py2rpy.register(pd.DataFrame)
161
+ def py2rpy_pandas(dataf):
162
+ pa_tbl = pyarrow.Table.from_pandas(dataf)
163
+ # pa_tbl is a pyarrow table, and this is something
164
+ # that the converter shipping with rpy2-arrow knows
165
+ # how to handle.
166
+ return base.as_data_frame(pa_tbl)
167
+
168
+ return conv
@@ -0,0 +1,101 @@
1
+ """Module for Rpy2 module-specific constants"""
2
+
3
+ # Contextualization
4
+ # Proteinatlas
5
+ from __future__ import annotations
6
+
7
+ from types import SimpleNamespace
8
+
9
+ from napistu.constants import ONTOLOGIES
10
+ from napistu.constants import MINI_SBO_FROM_NAME
11
+
12
+ # available ontologies for mapping via bioconductor "org" packages as part of rpy2.rids
13
+ # ontologies which are valid to map to and/or from when adding annotations to an SBML_dfs model.
14
+ BIOC_VALID_EXPANDED_SPECIES_ONTOLOGIES = {
15
+ ONTOLOGIES.ENSEMBL_GENE,
16
+ ONTOLOGIES.ENSEMBL_TRANSCRIPT,
17
+ ONTOLOGIES.ENSEMBL_PROTEIN,
18
+ ONTOLOGIES.NCBI_ENTREZ_GENE,
19
+ ONTOLOGIES.UNIPROT,
20
+ ONTOLOGIES.GENE_NAME,
21
+ ONTOLOGIES.SYMBOL,
22
+ }
23
+
24
+ # bioc ontologies used for linking systematic identifiers
25
+ # (entrez is not part of this list because it forms the gene index)
26
+ BIOC_DOGMATIC_MAPPING_ONTOLOGIES = {
27
+ ONTOLOGIES.ENSEMBL_GENE,
28
+ ONTOLOGIES.ENSEMBL_TRANSCRIPT,
29
+ ONTOLOGIES.ENSEMBL_PROTEIN,
30
+ ONTOLOGIES.UNIPROT,
31
+ ONTOLOGIES.GENE_NAME,
32
+ ONTOLOGIES.SYMBOL,
33
+ }
34
+ BIOC_PROTEIN_ONTOLOGIES = [ONTOLOGIES.UNIPROT, ONTOLOGIES.ENSEMBL_PROTEIN]
35
+ BIOC_GENE_ONTOLOGIES = [
36
+ ONTOLOGIES.NCBI_ENTREZ_GENE,
37
+ ONTOLOGIES.ENSEMBL_GENE,
38
+ ONTOLOGIES.ENSEMBL_TRANSCRIPT,
39
+ ]
40
+ BIOC_NAME_ONTOLOGIES = {
41
+ ONTOLOGIES.GENE_NAME: 0,
42
+ ONTOLOGIES.SYMBOL: 1,
43
+ ONTOLOGIES.UNIPROT: 2,
44
+ ONTOLOGIES.ENSEMBL_PROTEIN: 3,
45
+ }
46
+
47
+ # prefixes for bioconductor mapping tables
48
+ BIOC_NOMENCLATURE = SimpleNamespace(
49
+ CHR_TBL="CHR",
50
+ ENSG_TBL="ENSEMBL",
51
+ ENST_TBL="ENSEMBLTRANS",
52
+ ENSP_TBL="ENSEMBLPROT",
53
+ UNIPROT_TBL="UNIPROT",
54
+ NAME_TBL="GENENAME",
55
+ SYMBOL_TBL="SYMBOL",
56
+ CHROMOSOME="chromosome",
57
+ NCBI_ENTREZ_GENE="gene_id",
58
+ ENSEMBL_GENE="ensembl_id",
59
+ ENSEMBL_TRANSCRIPT="trans_id",
60
+ ENSEMBL_PROTEIN="prot_id",
61
+ UNIPROT="uniprot_id",
62
+ GENE_NAME="gene_name",
63
+ SYMBOL="symbol",
64
+ )
65
+
66
+ # netcontextr constants
67
+
68
+ COL_GENE = "gene"
69
+ COL_PROTEIN_1 = "protein1"
70
+ COL_PROTEIN_2 = "protein2"
71
+
72
+ FIELD_INTERACTIONS = "interactions"
73
+ FIELD_GENES = "genes"
74
+ FIELD_REACTIONS = "reactions"
75
+
76
+ # Netcontextr reactions
77
+ COL_ROLE = "role"
78
+ COL_REACTION_ID = "reaction_id"
79
+ COL_STOICHIOMETRY = "stoi"
80
+
81
+ SBO_TERM_MAP = {
82
+ "reactant": "substrate",
83
+ "product": "product",
84
+ "catalyst": "catalyst",
85
+ "interactor": "interactor",
86
+ "stimulator": "activator",
87
+ "inhibitor": "inhibitor",
88
+ }
89
+
90
+ NETCONTEXTR_ONTOLOGY = "ensembl_gene"
91
+
92
+
93
+ def _map_sbo_identifiers() -> dict[str, str]:
94
+ """Map sbo identifiers to netcontextr identifiers"""
95
+
96
+ sbo_map = {MINI_SBO_FROM_NAME[k]: v for k, v in SBO_TERM_MAP.items()}
97
+
98
+ return sbo_map
99
+
100
+
101
+ NETCONTEXTR_SBO_MAP = _map_sbo_identifiers()