napistu 0.2.5.dev6__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. napistu/__main__.py +126 -96
  2. napistu/constants.py +35 -41
  3. napistu/context/__init__.py +10 -0
  4. napistu/context/discretize.py +462 -0
  5. napistu/context/filtering.py +387 -0
  6. napistu/gcs/__init__.py +1 -1
  7. napistu/identifiers.py +74 -15
  8. napistu/indices.py +68 -0
  9. napistu/ingestion/__init__.py +1 -1
  10. napistu/ingestion/bigg.py +47 -62
  11. napistu/ingestion/constants.py +18 -133
  12. napistu/ingestion/gtex.py +113 -0
  13. napistu/ingestion/hpa.py +147 -0
  14. napistu/ingestion/sbml.py +0 -97
  15. napistu/ingestion/string.py +2 -2
  16. napistu/matching/__init__.py +10 -0
  17. napistu/matching/constants.py +18 -0
  18. napistu/matching/interactions.py +518 -0
  19. napistu/matching/mount.py +529 -0
  20. napistu/matching/species.py +510 -0
  21. napistu/mcp/__init__.py +7 -4
  22. napistu/mcp/__main__.py +128 -72
  23. napistu/mcp/client.py +16 -25
  24. napistu/mcp/codebase.py +201 -153
  25. napistu/mcp/component_base.py +170 -0
  26. napistu/mcp/config.py +223 -0
  27. napistu/mcp/constants.py +45 -2
  28. napistu/mcp/documentation.py +253 -136
  29. napistu/mcp/documentation_utils.py +13 -48
  30. napistu/mcp/execution.py +372 -305
  31. napistu/mcp/health.py +49 -67
  32. napistu/mcp/profiles.py +10 -6
  33. napistu/mcp/server.py +161 -80
  34. napistu/mcp/tutorials.py +139 -87
  35. napistu/modify/__init__.py +1 -1
  36. napistu/modify/gaps.py +1 -1
  37. napistu/network/__init__.py +1 -1
  38. napistu/network/constants.py +101 -34
  39. napistu/network/data_handling.py +388 -0
  40. napistu/network/ig_utils.py +351 -0
  41. napistu/network/napistu_graph_core.py +354 -0
  42. napistu/network/neighborhoods.py +40 -40
  43. napistu/network/net_create.py +373 -309
  44. napistu/network/net_propagation.py +47 -19
  45. napistu/network/{net_utils.py → ng_utils.py} +124 -272
  46. napistu/network/paths.py +67 -51
  47. napistu/network/precompute.py +11 -11
  48. napistu/ontologies/__init__.py +10 -0
  49. napistu/ontologies/constants.py +129 -0
  50. napistu/ontologies/dogma.py +243 -0
  51. napistu/ontologies/genodexito.py +649 -0
  52. napistu/ontologies/mygene.py +369 -0
  53. napistu/ontologies/renaming.py +198 -0
  54. napistu/rpy2/__init__.py +229 -86
  55. napistu/rpy2/callr.py +47 -77
  56. napistu/rpy2/constants.py +24 -23
  57. napistu/rpy2/rids.py +61 -648
  58. napistu/sbml_dfs_core.py +587 -222
  59. napistu/scverse/__init__.py +15 -0
  60. napistu/scverse/constants.py +28 -0
  61. napistu/scverse/loading.py +727 -0
  62. napistu/utils.py +118 -10
  63. {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/METADATA +8 -3
  64. napistu-0.3.1.dist-info/RECORD +133 -0
  65. tests/conftest.py +22 -0
  66. tests/test_context_discretize.py +56 -0
  67. tests/test_context_filtering.py +267 -0
  68. tests/test_identifiers.py +100 -0
  69. tests/test_indices.py +65 -0
  70. tests/{test_edgelist.py → test_ingestion_napistu_edgelist.py} +2 -2
  71. tests/test_matching_interactions.py +108 -0
  72. tests/test_matching_mount.py +305 -0
  73. tests/test_matching_species.py +394 -0
  74. tests/test_mcp_config.py +193 -0
  75. tests/test_mcp_documentation_utils.py +12 -3
  76. tests/test_mcp_server.py +356 -0
  77. tests/test_network_data_handling.py +397 -0
  78. tests/test_network_ig_utils.py +23 -0
  79. tests/test_network_neighborhoods.py +19 -0
  80. tests/test_network_net_create.py +459 -0
  81. tests/test_network_ng_utils.py +30 -0
  82. tests/test_network_paths.py +56 -0
  83. tests/{test_precomputed_distances.py → test_network_precompute.py} +8 -6
  84. tests/test_ontologies_genodexito.py +58 -0
  85. tests/test_ontologies_mygene.py +39 -0
  86. tests/test_ontologies_renaming.py +110 -0
  87. tests/test_rpy2_callr.py +79 -0
  88. tests/test_rpy2_init.py +151 -0
  89. tests/test_sbml.py +0 -31
  90. tests/test_sbml_dfs_core.py +134 -10
  91. tests/test_scverse_loading.py +778 -0
  92. tests/test_set_coverage.py +2 -2
  93. tests/test_utils.py +121 -1
  94. napistu/mechanism_matching.py +0 -1353
  95. napistu/rpy2/netcontextr.py +0 -467
  96. napistu-0.2.5.dev6.dist-info/RECORD +0 -97
  97. tests/test_igraph.py +0 -367
  98. tests/test_mechanism_matching.py +0 -784
  99. tests/test_net_utils.py +0 -149
  100. tests/test_netcontextr.py +0 -105
  101. tests/test_rpy2.py +0 -61
  102. /napistu/ingestion/{cpr_edgelist.py → napistu_edgelist.py} +0 -0
  103. {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/WHEEL +0 -0
  104. {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/entry_points.txt +0 -0
  105. {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/licenses/LICENSE +0 -0
  106. {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/top_level.txt +0 -0
  107. /tests/{test_obo.py → test_ingestion_obo.py} +0 -0
@@ -0,0 +1,510 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from typing import Optional, Union, Set, Dict, List
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+
9
+ from napistu import identifiers
10
+ from napistu.constants import ONTOLOGIES_LIST, SBML_DFS, IDENTIFIERS
11
+ from napistu.matching.constants import FEATURE_ID_VAR_DEFAULT
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ def features_to_pathway_species(
17
+ feature_identifiers: pd.DataFrame,
18
+ species_identifiers: pd.DataFrame,
19
+ ontologies: set,
20
+ feature_identifiers_var: str = IDENTIFIERS.IDENTIFIER,
21
+ feature_id_var: str = FEATURE_ID_VAR_DEFAULT,
22
+ expand_identifiers: bool = False,
23
+ identifier_delimiter: str = "/",
24
+ verbose: bool = False,
25
+ ) -> pd.DataFrame:
26
+ """
27
+ Features to Pathway Species
28
+
29
+ Match a table of molecular species to their corresponding species in a pathway representation.
30
+
31
+ Parameters:
32
+ feature_identifiers: pd.DataFrame
33
+ pd.Dataframe containing a "feature_identifiers_var" variable used to match entries
34
+ species_identifiers: pd.DataFrame
35
+ A table of molecular species identifiers produced from sbml_dfs.get_identifiers("species")
36
+ generally using sbml_dfs_core.export_sbml_dfs()
37
+ ontologies: set
38
+ A set of ontologies used to match features to pathway species
39
+ feature_identifiers_var: str
40
+ Variable in "feature_identifiers" containing identifiers
41
+ expand_identifiers: bool, default=False
42
+ If True, split identifiers in feature_identifiers_var by identifier_delimiter and explode into multiple rows
43
+ identifier_delimiter: str, default="/"
44
+ Delimiter to use for splitting identifiers if expand_identifiers is True
45
+ verbose: bool, default=False
46
+ If True, log mapping statistics at the end of the function
47
+
48
+ Returns:
49
+ pathway_species: pd.DataFrame
50
+ species_identifiers joined to feature_identifiers based on shared identifiers
51
+ """
52
+
53
+ # Check for identifier column
54
+ if feature_identifiers_var not in feature_identifiers.columns.to_list():
55
+ raise ValueError(
56
+ f"{feature_identifiers_var} must be a variable in 'feature_identifiers', "
57
+ f"possible variables are {', '.join(feature_identifiers.columns.tolist())}"
58
+ )
59
+
60
+ # Respect or create feature_id column
61
+ feature_identifiers = _ensure_feature_id_var(feature_identifiers, feature_id_var)
62
+
63
+ # Optionally expand identifiers into multiple rows
64
+ if expand_identifiers:
65
+ # Count the number of expansions by counting delimiters
66
+ n_expansions = (
67
+ feature_identifiers[feature_identifiers_var]
68
+ .astype(str)
69
+ .str.count(identifier_delimiter)
70
+ .sum()
71
+ )
72
+ if n_expansions > 0:
73
+ logger.info(
74
+ f"Expanding identifiers: {n_expansions} delimiters found in '{feature_identifiers_var}', will expand to more rows."
75
+ )
76
+
77
+ # Split, strip whitespace, and explode
78
+ feature_identifiers = feature_identifiers.copy()
79
+ feature_identifiers[feature_identifiers_var] = (
80
+ feature_identifiers[feature_identifiers_var]
81
+ .astype(str)
82
+ .str.split(identifier_delimiter)
83
+ .apply(lambda lst: [x.strip() for x in lst])
84
+ )
85
+ feature_identifiers = feature_identifiers.explode(
86
+ feature_identifiers_var, ignore_index=True
87
+ )
88
+
89
+ # check identifiers table
90
+ identifiers._check_species_identifiers_table(species_identifiers)
91
+
92
+ available_ontologies = set(species_identifiers[IDENTIFIERS.ONTOLOGY].tolist())
93
+ unavailable_ontologies = ontologies.difference(available_ontologies)
94
+
95
+ # no ontologies present
96
+ if len(unavailable_ontologies) == len(ontologies):
97
+ raise ValueError(
98
+ f"None of the requested ontologies ({', '.join(ontologies)}) "
99
+ "were used to annotate pathway species. Available ontologies are: "
100
+ f"{', '.join(available_ontologies)}"
101
+ )
102
+
103
+ # 1+ desired ontologies are not present
104
+ if len(unavailable_ontologies) > 0:
105
+ raise ValueError(
106
+ f"Some of the requested ontologies ({', '.join(unavailable_ontologies)}) "
107
+ "were NOT used to annotate pathway species. Available ontologies are: "
108
+ f"{', '.join(available_ontologies)}"
109
+ )
110
+
111
+ relevant_identifiers = species_identifiers[
112
+ species_identifiers[IDENTIFIERS.ONTOLOGY].isin(ontologies)
113
+ ]
114
+
115
+ # map features to pathway species
116
+ pathway_species = feature_identifiers.merge(
117
+ relevant_identifiers,
118
+ left_on=feature_identifiers_var,
119
+ right_on=IDENTIFIERS.IDENTIFIER,
120
+ )
121
+
122
+ if pathway_species.shape[0] == 0:
123
+ logger.warning(
124
+ "None of the provided species identifiers matched entries of the pathway; returning None"
125
+ )
126
+ None
127
+
128
+ # report the fraction of unmapped species
129
+ if verbose:
130
+ _log_feature_species_mapping_stats(pathway_species, feature_id_var)
131
+
132
+ return pathway_species
133
+
134
+
135
+ def match_features_to_wide_pathway_species(
136
+ wide_df: pd.DataFrame,
137
+ species_identifiers: pd.DataFrame,
138
+ ontologies: Optional[Union[Set[str], Dict[str, str]]] = None,
139
+ feature_identifiers_var: str = IDENTIFIERS.IDENTIFIER,
140
+ feature_id_var: str = FEATURE_ID_VAR_DEFAULT,
141
+ verbose: bool = False,
142
+ ) -> pd.DataFrame:
143
+ """
144
+ Convert a wide-format DataFrame with multiple ontology columns to long format,
145
+ and match features to pathway species by ontology and identifier.
146
+
147
+ Parameters
148
+ ----------
149
+ wide_df : pd.DataFrame
150
+ DataFrame with ontology identifier columns and any number of results columns.
151
+ All non-ontology columns are treated as results.
152
+ species_identifiers : pd.DataFrame
153
+ DataFrame as required by features_to_pathway_species
154
+ ontologies : Optional[Union[Set[str], Dict[str, str]]], default=None
155
+ Either:
156
+ - Set of columns to treat as ontologies (these should be entries in ONTOLOGIES_LIST )
157
+ - Dict mapping wide column names to ontology names in the ONTOLOGIES_LIST controlled vocabulary
158
+ - None to automatically detect valid ontology columns based on ONTOLOGIES_LIST
159
+ feature_identifiers_var : str, default="identifier"
160
+ Name for the identifier column in the long format
161
+ feature_id_var: str, default=FEATURE_ID_VAR_DEFAULT
162
+ Name for the feature id column in the long format
163
+ verbose : bool, default=False
164
+ Whether to print verbose output
165
+
166
+ Returns
167
+ -------
168
+ pd.DataFrame
169
+ Output of match_by_ontology_and_identifier
170
+
171
+ Examples
172
+ --------
173
+ >>> # Example with auto-detected ontology columns and multiple results
174
+ >>> wide_df = pd.DataFrame({
175
+ ... 'uniprot': ['P12345', 'Q67890'],
176
+ ... 'chebi': ['15377', '16810'],
177
+ ... 'log2fc': [1.0, 2.0],
178
+ ... 'pvalue': [0.01, 0.05]
179
+ ... })
180
+ >>> result = match_features_to_wide_pathway_species(
181
+ ... wide_df=wide_df,
182
+ ... species_identifiers=species_identifiers
183
+ ... )
184
+
185
+ >>> # Example with custom ontology mapping
186
+ >>> wide_df = pd.DataFrame({
187
+ ... 'protein_id': ['P12345', 'Q67890'],
188
+ ... 'compound_id': ['15377', '16810'],
189
+ ... 'expression': [1.0, 2.0],
190
+ ... 'confidence': [0.8, 0.9]
191
+ ... })
192
+ >>> result = match_features_to_wide_pathway_species(
193
+ ... wide_df=wide_df,
194
+ ... species_identifiers=species_identifiers,
195
+ ... ontologies={'protein_id': 'uniprot', 'compound_id': 'chebi'}
196
+ ... )
197
+ """
198
+ # Make a copy to avoid modifying the input
199
+ wide_df = wide_df.copy()
200
+
201
+ # Validate ontologies and get the set of ontology columns
202
+ ontology_cols = _validate_wide_ontologies(wide_df, ontologies)
203
+ melt_cols = list(ontology_cols)
204
+
205
+ # Apply renaming if a mapping is provided
206
+ if isinstance(ontologies, dict):
207
+ wide_df = wide_df.rename(columns=ontologies)
208
+
209
+ # Ensure feature_id column exists
210
+ wide_df = _ensure_feature_id_var(wide_df, feature_id_var)
211
+
212
+ # All non-ontology columns are treated as results
213
+ results_cols = list(set(wide_df.columns) - set(melt_cols))
214
+ if not results_cols:
215
+ raise ValueError("No results columns found in DataFrame")
216
+
217
+ logger.info(f"Using columns as results: {results_cols}")
218
+
219
+ # Melt ontology columns to long format, keeping all results columns
220
+ long_df = wide_df.melt(
221
+ id_vars=results_cols,
222
+ value_vars=melt_cols,
223
+ var_name=IDENTIFIERS.ONTOLOGY,
224
+ value_name=feature_identifiers_var,
225
+ ).dropna(subset=[feature_identifiers_var])
226
+
227
+ logger.debug(f"Final long format shape: {long_df.shape}")
228
+
229
+ # Call the matching function with the validated ontologies
230
+ out = match_by_ontology_and_identifier(
231
+ feature_identifiers=long_df,
232
+ species_identifiers=species_identifiers,
233
+ ontologies=ontology_cols,
234
+ feature_identifiers_var=feature_identifiers_var,
235
+ )
236
+
237
+ if verbose:
238
+ _log_feature_species_mapping_stats(out, feature_id_var)
239
+
240
+ return out
241
+
242
+
243
+ def match_by_ontology_and_identifier(
244
+ feature_identifiers: pd.DataFrame,
245
+ species_identifiers: pd.DataFrame,
246
+ ontologies: Union[str, Set[str], List[str]],
247
+ feature_identifiers_var: str = IDENTIFIERS.IDENTIFIER,
248
+ verbose: bool = False,
249
+ ) -> pd.DataFrame:
250
+ """
251
+ Match features to pathway species based on both ontology and identifier matches.
252
+ Performs separate matching for each ontology and concatenates the results.
253
+
254
+ Parameters
255
+ ----------
256
+ feature_identifiers : pd.DataFrame
257
+ DataFrame containing feature identifiers and results.
258
+ Must have columns [ontology, feature_identifiers_var, results]
259
+ species_identifiers : pd.DataFrame
260
+ DataFrame containing species identifiers from pathway.
261
+ Must have columns [ontology, identifier]
262
+ ontologies : Union[str, Set[str], List[str]]
263
+ Ontologies to match on. Can be:
264
+ - A single ontology string
265
+ - A set of ontology strings
266
+ - A list of ontology strings
267
+ feature_identifiers_var : str, default="identifier"
268
+ Name of the identifier column in feature_identifiers
269
+ verbose : bool, default=False
270
+ Whether to print verbose output
271
+
272
+ Returns
273
+ -------
274
+ pd.DataFrame
275
+ Concatenated results of matching for each ontology.
276
+ Contains all columns from features_to_pathway_species()
277
+
278
+ Examples
279
+ --------
280
+ >>> # Match using a single ontology
281
+ >>> result = match_by_ontology_and_identifier(
282
+ ... feature_identifiers=features_df,
283
+ ... species_identifiers=species_df,
284
+ ... ontologies="uniprot"
285
+ ... )
286
+
287
+ >>> # Match using multiple ontologies
288
+ >>> result = match_by_ontology_and_identifier(
289
+ ... feature_identifiers=features_df,
290
+ ... species_identifiers=species_df,
291
+ ... ontologies={"uniprot", "chebi"}
292
+ ... )
293
+ """
294
+ # Convert string to set for consistent handling
295
+ if isinstance(ontologies, str):
296
+ ontologies = {ontologies}
297
+ elif isinstance(ontologies, list):
298
+ ontologies = set(ontologies)
299
+
300
+ # Validate ontologies
301
+ invalid_onts = ontologies - set(ONTOLOGIES_LIST)
302
+ if invalid_onts:
303
+ raise ValueError(
304
+ f"Invalid ontologies specified: {invalid_onts}. Must be one of: {ONTOLOGIES_LIST}"
305
+ )
306
+
307
+ # Initialize list to store results
308
+ matched_dfs = []
309
+
310
+ # Process each ontology separately
311
+ for ont in ontologies:
312
+ # Filter feature identifiers to current ontology and drop ontology column
313
+ ont_features = (
314
+ feature_identifiers[feature_identifiers[IDENTIFIERS.ONTOLOGY] == ont]
315
+ .drop(columns=[IDENTIFIERS.ONTOLOGY])
316
+ .copy()
317
+ )
318
+
319
+ if ont_features.empty:
320
+ logger.warning(f"No features found for ontology: {ont}")
321
+ continue
322
+
323
+ # Filter species identifiers to current ontology
324
+ ont_species = species_identifiers[
325
+ species_identifiers[IDENTIFIERS.ONTOLOGY] == ont
326
+ ].copy()
327
+
328
+ if ont_species.empty:
329
+ logger.warning(f"No species found for ontology: {ont}")
330
+ continue
331
+
332
+ logger.debug(
333
+ f"Matching {len(ont_features)} features to {len(ont_species)} species for ontology {ont}"
334
+ )
335
+
336
+ # Match features to species for this ontology
337
+ matched = features_to_pathway_species(
338
+ feature_identifiers=ont_features,
339
+ species_identifiers=ont_species,
340
+ ontologies={ont},
341
+ feature_identifiers_var=feature_identifiers_var,
342
+ verbose=verbose,
343
+ )
344
+
345
+ if matched.empty:
346
+ logger.warning(f"No matches found for ontology: {ont}")
347
+ continue
348
+
349
+ matched_dfs.append(matched)
350
+
351
+ if not matched_dfs:
352
+ logger.warning("No matches found for any ontology")
353
+ return pd.DataFrame() # Return empty DataFrame with correct columns
354
+
355
+ # Combine results from all ontologies
356
+ result = pd.concat(matched_dfs, axis=0, ignore_index=True)
357
+
358
+ logger.info(
359
+ f"Found {len(result)} total matches across {len(matched_dfs)} ontologies"
360
+ )
361
+
362
+ return result
363
+
364
+
365
+ def _validate_wide_ontologies(
366
+ wide_df: pd.DataFrame,
367
+ ontologies: Optional[Union[str, Set[str], Dict[str, str]]] = None,
368
+ ) -> Set[str]:
369
+ """
370
+ Validate ontology specifications against the wide DataFrame and ONTOLOGIES_LIST.
371
+
372
+ Parameters
373
+ ----------
374
+ wide_df : pd.DataFrame
375
+ DataFrame with one column per ontology and a results column
376
+ ontologies : Optional[Union[str, Set[str], Dict[str, str]]]
377
+ Either:
378
+ - String specifying a single ontology column
379
+ - Set of columns to treat as ontologies
380
+ - Dict mapping wide column names to ontology names
381
+ - None to automatically detect ontology columns based on ONTOLOGIES_LIST
382
+
383
+ Returns
384
+ -------
385
+ Set[str]
386
+ Set of validated ontology names. For dictionary mappings, returns the target ontology names.
387
+
388
+ Raises
389
+ ------
390
+ ValueError
391
+ If validation fails for any ontology specification or no valid ontologies are found
392
+ """
393
+ # Convert string input to set
394
+ if isinstance(ontologies, str):
395
+ ontologies = {ontologies}
396
+
397
+ # Get the set of ontology columns
398
+ if isinstance(ontologies, dict):
399
+ # Check source columns exist in DataFrame
400
+ missing_cols = set(ontologies.keys()) - set(wide_df.columns)
401
+ if missing_cols:
402
+ raise ValueError(f"Source columns not found in DataFrame: {missing_cols}")
403
+ # Validate target ontologies against ONTOLOGIES_LIST
404
+ invalid_onts = set(ontologies.values()) - set(ONTOLOGIES_LIST)
405
+ if invalid_onts:
406
+ raise ValueError(
407
+ f"Invalid ontologies in mapping: {invalid_onts}. Must be one of: {ONTOLOGIES_LIST}"
408
+ )
409
+ # Return target ontology names instead of source column names
410
+ ontology_cols = set(ontologies.values())
411
+
412
+ elif isinstance(ontologies, set):
413
+ # Check specified columns exist in DataFrame
414
+ missing_cols = ontologies - set(wide_df.columns)
415
+ if missing_cols:
416
+ raise ValueError(
417
+ f"Specified ontology columns not found in DataFrame: {missing_cols}"
418
+ )
419
+ # Validate specified ontologies against ONTOLOGIES_LIST
420
+ invalid_onts = ontologies - set(ONTOLOGIES_LIST)
421
+ if invalid_onts:
422
+ raise ValueError(
423
+ f"Invalid ontologies in set: {invalid_onts}. Must be one of: {ONTOLOGIES_LIST}"
424
+ )
425
+ ontology_cols = ontologies
426
+
427
+ else:
428
+ # Auto-detect ontology columns by matching against ONTOLOGIES_LIST
429
+ ontology_cols = set(wide_df.columns) & set(ONTOLOGIES_LIST)
430
+ if not ontology_cols:
431
+ raise ValueError(
432
+ f"No valid ontology columns found in DataFrame. Column names must match one of: {ONTOLOGIES_LIST}"
433
+ )
434
+ logger.info(f"Auto-detected ontology columns: {ontology_cols}")
435
+
436
+ logger.debug(f"Validated ontology columns: {ontology_cols}")
437
+ return ontology_cols
438
+
439
+
440
+ def _log_feature_species_mapping_stats(
441
+ pathway_species: pd.DataFrame, feature_id_var: str = FEATURE_ID_VAR_DEFAULT
442
+ ):
443
+ """
444
+ Log statistics about the mapping between feature_id and s_id in the pathway_species DataFrame.
445
+ """
446
+
447
+ # Percent change in feature_ids
448
+ n_feature_ids = pathway_species[feature_id_var].nunique()
449
+ n_input_feature_ids = (
450
+ pathway_species[feature_id_var].max() + 1
451
+ if feature_id_var in pathway_species.columns
452
+ else 0
453
+ )
454
+ percent_change = (
455
+ 100 * (n_feature_ids - n_input_feature_ids) / n_input_feature_ids
456
+ if n_input_feature_ids
457
+ else 0
458
+ )
459
+ logger.info(
460
+ f"{percent_change:+.1f}% change in feature_ids ({n_feature_ids} vs {n_input_feature_ids})"
461
+ )
462
+
463
+ # Number of times an s_id maps to 1+ feature_ids (with s_name)
464
+ s_id_counts = pathway_species.groupby(SBML_DFS.S_ID)[feature_id_var].nunique()
465
+ s_id_multi = s_id_counts[s_id_counts > 1]
466
+ logger.info(f"{len(s_id_multi)} s_id(s) map to more than one feature_id.")
467
+ if not s_id_multi.empty:
468
+ examples = pathway_species[
469
+ pathway_species[SBML_DFS.S_ID].isin(s_id_multi.index)
470
+ ][[SBML_DFS.S_ID, SBML_DFS.S_NAME, feature_id_var]]
471
+ logger.info(
472
+ f"Examples of s_id mapping to multiple feature_ids (showing up to 3):\n{examples.groupby([SBML_DFS.S_ID, SBML_DFS.S_NAME])[feature_id_var].apply(list).head(3)}"
473
+ )
474
+
475
+ # Number of times a feature_id maps to 1+ s_ids (with s_name)
476
+ feature_id_counts = pathway_species.groupby(feature_id_var)[SBML_DFS.S_ID].nunique()
477
+ feature_id_multi = feature_id_counts[feature_id_counts > 1]
478
+ logger.info(f"{len(feature_id_multi)} feature_id(s) map to more than one s_id.")
479
+ if not feature_id_multi.empty:
480
+ examples = pathway_species[
481
+ pathway_species[feature_id_var].isin(feature_id_multi.index)
482
+ ][[feature_id_var, SBML_DFS.S_ID, SBML_DFS.S_NAME]]
483
+ logger.info(
484
+ f"Examples of feature_id mapping to multiple s_ids (showing up to 3):\n{examples.groupby([feature_id_var])[[SBML_DFS.S_ID, SBML_DFS.S_NAME]].apply(lambda df: list(df.itertuples(index=False, name=None))).head(3)}"
485
+ )
486
+
487
+
488
+ def _ensure_feature_id_var(
489
+ df: pd.DataFrame, feature_id_var: str = FEATURE_ID_VAR_DEFAULT
490
+ ) -> pd.DataFrame:
491
+ """
492
+ Ensure the DataFrame has a feature_id column, creating one if it doesn't exist.
493
+
494
+ Parameters
495
+ ----------
496
+ df : pd.DataFrame
497
+ DataFrame to check/modify
498
+ feature_id_var : str, default=FEATURE_ID_VAR_DEFAULT
499
+ Name of the feature ID column
500
+
501
+ Returns
502
+ -------
503
+ pd.DataFrame
504
+ DataFrame with guaranteed feature_id column
505
+ """
506
+ if feature_id_var not in df.columns:
507
+ logger.warning(f"No {feature_id_var} column found in DataFrame, creating one")
508
+ df = df.copy()
509
+ df[feature_id_var] = np.arange(len(df))
510
+ return df
napistu/mcp/__init__.py CHANGED
@@ -18,15 +18,18 @@ except ImportError:
18
18
  is_available = False
19
19
 
20
20
  if is_available:
21
- from .server import create_server
22
- from .profiles import get_profile
21
+ from napistu.mcp.server import create_server
22
+ from napistu.mcp.profiles import get_profile
23
+ from napistu.mcp.constants import MCP_PROFILES
23
24
 
24
- def start_server(profile_name: str = "local", **kwargs) -> Dict[str, Any]:
25
+ def start_server(
26
+ profile_name: str = MCP_PROFILES.EXECUTION, **kwargs
27
+ ) -> Dict[str, Any]:
25
28
  """
26
29
  Start an MCP server with a specific profile.
27
30
 
28
31
  Args:
29
- profile_name: Name of the profile ('local', 'remote', or 'full')
32
+ profile_name: Name of the profile ('execution', 'docs', or 'full')
30
33
  **kwargs: Additional configuration options
31
34
 
32
35
  Returns: