napistu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. napistu/__init__.py +12 -0
  2. napistu/__main__.py +867 -0
  3. napistu/consensus.py +1557 -0
  4. napistu/constants.py +500 -0
  5. napistu/gcs/__init__.py +10 -0
  6. napistu/gcs/constants.py +69 -0
  7. napistu/gcs/downloads.py +180 -0
  8. napistu/identifiers.py +805 -0
  9. napistu/indices.py +227 -0
  10. napistu/ingestion/__init__.py +10 -0
  11. napistu/ingestion/bigg.py +146 -0
  12. napistu/ingestion/constants.py +296 -0
  13. napistu/ingestion/cpr_edgelist.py +106 -0
  14. napistu/ingestion/identifiers_etl.py +148 -0
  15. napistu/ingestion/obo.py +268 -0
  16. napistu/ingestion/psi_mi.py +276 -0
  17. napistu/ingestion/reactome.py +218 -0
  18. napistu/ingestion/sbml.py +621 -0
  19. napistu/ingestion/string.py +356 -0
  20. napistu/ingestion/trrust.py +285 -0
  21. napistu/ingestion/yeast.py +147 -0
  22. napistu/mechanism_matching.py +597 -0
  23. napistu/modify/__init__.py +10 -0
  24. napistu/modify/constants.py +86 -0
  25. napistu/modify/curation.py +628 -0
  26. napistu/modify/gaps.py +635 -0
  27. napistu/modify/pathwayannot.py +1381 -0
  28. napistu/modify/uncompartmentalize.py +264 -0
  29. napistu/network/__init__.py +10 -0
  30. napistu/network/constants.py +117 -0
  31. napistu/network/neighborhoods.py +1594 -0
  32. napistu/network/net_create.py +1647 -0
  33. napistu/network/net_utils.py +652 -0
  34. napistu/network/paths.py +500 -0
  35. napistu/network/precompute.py +221 -0
  36. napistu/rpy2/__init__.py +127 -0
  37. napistu/rpy2/callr.py +168 -0
  38. napistu/rpy2/constants.py +101 -0
  39. napistu/rpy2/netcontextr.py +464 -0
  40. napistu/rpy2/rids.py +697 -0
  41. napistu/sbml_dfs_core.py +2216 -0
  42. napistu/sbml_dfs_utils.py +304 -0
  43. napistu/source.py +394 -0
  44. napistu/utils.py +943 -0
  45. napistu-0.1.0.dist-info/METADATA +56 -0
  46. napistu-0.1.0.dist-info/RECORD +77 -0
  47. napistu-0.1.0.dist-info/WHEEL +5 -0
  48. napistu-0.1.0.dist-info/entry_points.txt +2 -0
  49. napistu-0.1.0.dist-info/licenses/LICENSE +21 -0
  50. napistu-0.1.0.dist-info/top_level.txt +2 -0
  51. tests/__init__.py +0 -0
  52. tests/conftest.py +83 -0
  53. tests/test_consensus.py +255 -0
  54. tests/test_constants.py +20 -0
  55. tests/test_curation.py +134 -0
  56. tests/test_data/__init__.py +0 -0
  57. tests/test_edgelist.py +20 -0
  58. tests/test_gcs.py +23 -0
  59. tests/test_identifiers.py +151 -0
  60. tests/test_igraph.py +353 -0
  61. tests/test_indices.py +88 -0
  62. tests/test_mechanism_matching.py +126 -0
  63. tests/test_net_utils.py +66 -0
  64. tests/test_netcontextr.py +105 -0
  65. tests/test_obo.py +34 -0
  66. tests/test_pathwayannot.py +95 -0
  67. tests/test_precomputed_distances.py +222 -0
  68. tests/test_rpy2.py +61 -0
  69. tests/test_sbml.py +46 -0
  70. tests/test_sbml_dfs_create.py +307 -0
  71. tests/test_sbml_dfs_utils.py +22 -0
  72. tests/test_sbo.py +11 -0
  73. tests/test_set_coverage.py +50 -0
  74. tests/test_source.py +67 -0
  75. tests/test_uncompartmentalize.py +40 -0
  76. tests/test_utils.py +487 -0
  77. tests/utils.py +30 -0
@@ -0,0 +1,147 @@
1
+ from __future__ import annotations
2
+
3
+ import pandas as pd
4
+ from napistu import identifiers
5
+ from napistu import sbml_dfs_core
6
+ from napistu import source
7
+ from napistu import utils
8
+ from napistu.constants import BQB
9
+ from napistu.constants import MINI_SBO_FROM_NAME
10
+ from napistu.ingestion.constants import YEAST_IDEA_KINETICS_URL
11
+ from napistu.ingestion.constants import YEAST_IDEA_PUBMED_ID
12
+ from napistu.ingestion.constants import YEAST_IDEA_SOURCE
13
+ from napistu.ingestion.constants import YEAST_IDEA_TARGET
14
+
15
+
16
+ def download_idea(output_dir: str) -> None:
17
+ # save to
18
+ utils.download_and_extract(YEAST_IDEA_KINETICS_URL, output_dir)
19
+
20
+ # TODO: since only a single file is outputted, it makes sense to download and extract the data, then copy it to a target URI
21
+ # TODO: add GCS support
22
+ pass
23
+
24
+
25
+ def convert_idea_kinetics_to_sbml_dfs(
26
+ idea_path: str,
27
+ ) -> sbml_dfs_core.SBML_dfs:
28
+ """
29
+ Convert IDEA Kinetics to SBML DFs
30
+
31
+ Format yeast induction regulator->target relationships as a directed graph.
32
+
33
+ Args:
34
+ idea_path: Path to the IDEA Kinetics file.
35
+
36
+ Returns:
37
+ SBML_dfs: an SBML_dfs object containing molecular species and their interactions.
38
+ Kinetic attributes are included as reactions_data.
39
+
40
+ """
41
+
42
+ # TO DO - replace with GCS support (currently this just reads a local .tsv)
43
+ idea_kinetics_df = pd.read_csv(idea_path, sep="\t")
44
+
45
+ # separate based on whether the change is probably direct or indirect
46
+ idea_kinetics_df["directness"] = [
47
+ "direct" if t_rise < 15 else "indirect" for t_rise in idea_kinetics_df["t_rise"]
48
+ ]
49
+
50
+ # reduce cases of multiple TF-target pairs to a single entry
51
+ distinct_edges = (
52
+ idea_kinetics_df.groupby([YEAST_IDEA_SOURCE, YEAST_IDEA_TARGET], as_index=True)
53
+ .apply(_summarize_idea_pairs)
54
+ .reset_index()
55
+ )
56
+
57
+ # add some more fields are reformat
58
+ formatted_distinct_edges = distinct_edges.rename(
59
+ {YEAST_IDEA_SOURCE: "upstream_name", YEAST_IDEA_TARGET: "downstream_name"},
60
+ axis=1,
61
+ ).assign(
62
+ upstream_compartment="cellular_component",
63
+ downstream_compartment="cellular_component",
64
+ # tag reactions with the IDEA publication
65
+ r_Identifiers=identifiers._format_Identifiers_pubmed(YEAST_IDEA_PUBMED_ID),
66
+ r_isreversible=False,
67
+ )
68
+
69
+ # create some nice interaction names before we rename the roles as SBO terms
70
+ formatted_distinct_edges["r_name"] = [
71
+ f"{u} {d} {r} of {t}"
72
+ for u, d, r, t in zip(
73
+ formatted_distinct_edges["upstream_name"],
74
+ formatted_distinct_edges["directness"],
75
+ formatted_distinct_edges["role"],
76
+ formatted_distinct_edges["downstream_name"],
77
+ )
78
+ ]
79
+
80
+ # final interaction output
81
+ # replace readable roles with entries in the SBO ontology
82
+ interaction_edgelist = formatted_distinct_edges.replace(
83
+ {"role": MINI_SBO_FROM_NAME}
84
+ ).rename({"role": "sbo_term"}, axis=1)
85
+
86
+ species_df = pd.DataFrame(
87
+ {
88
+ "s_name": list(
89
+ {
90
+ *idea_kinetics_df[YEAST_IDEA_SOURCE],
91
+ *idea_kinetics_df[YEAST_IDEA_TARGET],
92
+ }
93
+ )
94
+ }
95
+ )
96
+
97
+ # create Identifiers objects for each species
98
+ species_df["s_Identifiers"] = [
99
+ identifiers.Identifiers(
100
+ [{"ontology": "gene_name", "identifier": x, "bqb": BQB.IS}]
101
+ )
102
+ for x in species_df["s_name"]
103
+ ]
104
+
105
+ # Constant fields (for this data source)
106
+
107
+ # setup compartments (just treat this as uncompartmentalized for now)
108
+ compartments_df = sbml_dfs_core._stub_compartments()
109
+
110
+ # Per convention unaggregated models receive an empty source
111
+ interaction_source = source.Source(init=True)
112
+
113
+ sbml_dfs = sbml_dfs_core.sbml_dfs_from_edgelist(
114
+ interaction_edgelist=interaction_edgelist,
115
+ species_df=species_df,
116
+ compartments_df=compartments_df,
117
+ interaction_source=interaction_source,
118
+ # additional attributes (directness) are added to reactions_data
119
+ keep_reactions_data="idea",
120
+ )
121
+ sbml_dfs.validate()
122
+
123
+ return sbml_dfs
124
+
125
+
126
+ def _summarize_idea_pairs(pairs_data: pd.DataFrame) -> pd.Series:
127
+ """Rollup multiple records of a TF->target pair into a single summary."""
128
+
129
+ # specify how to aggregate results if there are more than one entry for a TF-target pair
130
+ # pull most attributes from the earliest change
131
+ # this will favor direct over indirect naturally
132
+ earliest_change = pairs_data.sort_values("t_rise").iloc[0].to_dict()
133
+
134
+ KEYS_SUMMARIZED = ["v_inter", "v_final", "t_rise", "t_fall", "rate", "directness"]
135
+ kinetic_timing_dict = {k: earliest_change[k] for k in KEYS_SUMMARIZED}
136
+
137
+ # map v_inter (log2 fold-change change following perturbation) onto SBO terms for interactions
138
+ if (any(pairs_data["v_inter"] > 0)) and (any(pairs_data["v_inter"] < 0)):
139
+ kinetic_timing_dict["role"] = "modifier"
140
+ elif all(pairs_data["v_inter"] > 0):
141
+ kinetic_timing_dict["role"] = "stimulator"
142
+ elif all(pairs_data["v_inter"] < 0):
143
+ kinetic_timing_dict["role"] = "inhibitor"
144
+ else:
145
+ ValueError("Unexpected v_inter values")
146
+
147
+ return pd.Series(kinetic_timing_dict)