napistu 0.3.2.dev1__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,6 @@ from napistu import sbml_dfs_core
8
8
  from napistu import source
9
9
  from napistu import utils
10
10
  from napistu.constants import BQB
11
- from napistu.constants import COMPARTMENTS
12
11
  from napistu.constants import MINI_SBO_FROM_NAME
13
12
  from napistu.ingestion import napistu_edgelist
14
13
  from napistu.ingestion.constants import SBML_SPECIES_DICT_IDENTIFIERS
@@ -28,6 +27,7 @@ from napistu.ingestion.constants import STRING_UPSTREAM_COMPARTMENT
28
27
  from napistu.ingestion.constants import STRING_UPSTREAM_NAME
29
28
  from napistu.ingestion.constants import STRING_URL_EXPRESSIONS
30
29
  from napistu.ingestion.constants import STRING_VERSION
30
+ from napistu.ingestion.constants import GENERIC_COMPARTMENT
31
31
  from fs import open_fs
32
32
 
33
33
  logger = logging.getLogger(__name__)
@@ -297,7 +297,7 @@ def _build_interactor_edgelist(
297
297
  downstream_col_name: str = STRING_TARGET,
298
298
  add_reverse_interactions: bool = False,
299
299
  sbo_term: str = "interactor",
300
- compartment: str = COMPARTMENTS["CELLULAR_COMPONENT"],
300
+ compartment: str = GENERIC_COMPARTMENT,
301
301
  ) -> pd.DataFrame:
302
302
  """Format STRING interactions as reactions."""
303
303
 
napistu/modify/gaps.py CHANGED
@@ -15,18 +15,18 @@ from napistu import utils
15
15
  from napistu.network import net_create
16
16
 
17
17
  from napistu.constants import SBML_DFS
18
- from napistu.constants import COMPARTMENTS
19
18
  from napistu.constants import IDENTIFIERS
20
19
  from napistu.constants import MINI_SBO_FROM_NAME
21
20
  from napistu.constants import SBOTERM_NAMES
22
21
  from napistu.constants import SOURCE_SPEC
22
+ from napistu.ingestion.constants import EXCHANGE_COMPARTMENT
23
23
 
24
24
  logger = logging.getLogger(__name__)
25
25
 
26
26
 
27
27
  def add_transportation_reactions(
28
28
  sbml_dfs: sbml_dfs_core.SBML_dfs,
29
- exchange_compartment: str = COMPARTMENTS["CYTOSOL"],
29
+ exchange_compartment: str = EXCHANGE_COMPARTMENT,
30
30
  ) -> sbml_dfs_core.SBML_dfs:
31
31
  """
32
32
  Add transportation reactions to connect all forms of a protein across compartments.
@@ -73,7 +73,7 @@ def add_transportation_reactions(
73
73
  def update_sbml_df_with_exchange(
74
74
  species_needing_transport_rxns: np.ndarray,
75
75
  sbml_dfs: sbml_dfs_core.SBML_dfs,
76
- exchange_compartment: str = COMPARTMENTS["CYTOSOL"],
76
+ exchange_compartment: str = EXCHANGE_COMPARTMENT,
77
77
  ) -> sbml_dfs_core.SBML_dfs:
78
78
  """
79
79
  Add transportation reactions between all locations of a set of molecular species by
@@ -120,7 +120,7 @@ def save_precomputed_distances(
120
120
  OSError
121
121
  If the file cannot be written to (permission issues, etc.)
122
122
  """
123
- save_json(str(uri), precomputed_distances.to_dict(orient="index"))
123
+ save_json(str(uri), precomputed_distances.to_json())
124
124
 
125
125
 
126
126
  def load_precomputed_distances(uri: Union[str, Path]) -> pd.DataFrame:
@@ -143,12 +143,18 @@ def load_precomputed_distances(uri: Union[str, Path]) -> pd.DataFrame:
143
143
  If the specified file does not exist
144
144
  """
145
145
  try:
146
- data_dict = load_json(str(uri))
146
+ json_string = load_json(str(uri))
147
+ df = pd.read_json(json_string)
148
+
149
+ # Convert integer columns to float
150
+ for col in df.columns:
151
+ if df[col].dtype in ["int64", "int32", "int16", "int8"]:
152
+ df[col] = df[col].astype(float)
153
+
154
+ return df
147
155
  except ResourceNotFound as e:
148
156
  raise FileNotFoundError(f"File not found: {uri}") from e
149
157
 
150
- return pd.DataFrame.from_dict(data_dict, orient="index").rename(index=int)
151
-
152
158
 
153
159
  def _calculate_distances_subset(
154
160
  napistu_graph: NapistuGraph,
napistu/sbml_dfs_core.py CHANGED
@@ -13,6 +13,7 @@ from napistu import identifiers
13
13
  from napistu import sbml_dfs_utils
14
14
  from napistu import source
15
15
  from napistu import utils
16
+ from napistu.ingestion import sbml
16
17
  from napistu.constants import SBML_DFS
17
18
  from napistu.constants import SBML_DFS_SCHEMA
18
19
  from napistu.constants import IDENTIFIERS
@@ -23,9 +24,6 @@ from napistu.constants import BQB_PRIORITIES
23
24
  from napistu.constants import ONTOLOGY_PRIORITIES
24
25
  from napistu.constants import BQB
25
26
  from napistu.constants import BQB_DEFINING_ATTRS
26
- from napistu.constants import COMPARTMENTS
27
- from napistu.constants import COMPARTMENT_ALIASES
28
- from napistu.constants import COMPARTMENTS_GO_TERMS
29
27
  from napistu.constants import MINI_SBO_FROM_NAME
30
28
  from napistu.constants import MINI_SBO_TO_NAME
31
29
  from napistu.constants import ONTOLOGIES
@@ -34,8 +32,9 @@ from napistu.constants import SBOTERM_NAMES
34
32
  from napistu.constants import SBO_ROLES_DEFS
35
33
  from napistu.constants import ENTITIES_W_DATA
36
34
  from napistu.constants import ENTITIES_TO_ENTITY_DATA
37
- from napistu.constants import CHARACTERISTIC_COMPLEX_ONTOLOGIES
38
- from napistu.ingestion import sbml
35
+ from napistu.ingestion.constants import GENERIC_COMPARTMENT
36
+ from napistu.ingestion.constants import COMPARTMENT_ALIASES
37
+ from napistu.ingestion.constants import COMPARTMENTS_GO_TERMS
39
38
  from fs import open_fs
40
39
 
41
40
  logger = logging.getLogger(__name__)
@@ -145,7 +144,7 @@ class SBML_dfs:
145
144
  if ent in sbml_model:
146
145
  setattr(self, ent, sbml_model[ent])
147
146
  else:
148
- self = sbml.sbml_df_from_sbml(self, sbml_model)
147
+ self = sbml.sbml_dfs_from_sbml(self, sbml_model)
149
148
 
150
149
  for ent in SBML_DFS_SCHEMA.OPTIONAL_ENTITIES:
151
150
  # Initialize optional entities if not set
@@ -1421,8 +1420,8 @@ def filter_to_characteristic_species_ids(
1421
1420
  complexes and non-characteristic annotations such as pubmed references and
1422
1421
  homologues.
1423
1422
 
1424
- Parameters
1425
- ----------
1423
+ Parameters
1424
+ ----------
1426
1425
  species_ids: pd.DataFrame
1427
1426
  A table of identifiers produced by sdbml_dfs.get_identifiers("species")
1428
1427
  max_complex_size: int
@@ -1471,12 +1470,6 @@ def filter_to_characteristic_species_ids(
1471
1470
  # add components within modestly sized protein complexes
1472
1471
  # look at HAS_PART IDs
1473
1472
  bqb_has_parts_species = species_ids[species_ids[IDENTIFIERS.BQB] == BQB.HAS_PART]
1474
- # filter to genes
1475
- bqb_has_parts_species = bqb_has_parts_species[
1476
- bqb_has_parts_species[IDENTIFIERS.ONTOLOGY].isin(
1477
- CHARACTERISTIC_COMPLEX_ONTOLOGIES
1478
- )
1479
- ]
1480
1473
 
1481
1474
  # number of species in a complex
1482
1475
  n_species_components = bqb_has_parts_species.value_counts(
@@ -1488,38 +1481,10 @@ def filter_to_characteristic_species_ids(
1488
1481
  ].index.get_level_values(SBML_DFS.S_ID)
1489
1482
  )
1490
1483
 
1491
- # number of complexes a species is part of
1492
- n_complexes_involvedin = bqb_has_parts_species.value_counts(
1493
- [IDENTIFIERS.ONTOLOGY, IDENTIFIERS.IDENTIFIER]
1494
- )
1495
- promiscuous_component_identifiers_index = n_complexes_involvedin[
1496
- n_complexes_involvedin > max_promiscuity
1497
- ].index
1498
- promiscuous_component_identifiers = pd.Series(
1499
- data=[True] * len(promiscuous_component_identifiers_index),
1500
- index=promiscuous_component_identifiers_index,
1501
- name="is_shared_component",
1502
- dtype=bool,
1484
+ filtered_bqb_has_parts = _filter_promiscuous_components(
1485
+ bqb_has_parts_species, max_promiscuity
1503
1486
  )
1504
1487
 
1505
- if len(promiscuous_component_identifiers) == 0:
1506
- # no complexes to filter
1507
- return species_ids
1508
-
1509
- filtered_bqb_has_parts = bqb_has_parts_species.merge(
1510
- promiscuous_component_identifiers,
1511
- left_on=[IDENTIFIERS.ONTOLOGY, IDENTIFIERS.IDENTIFIER],
1512
- right_index=True,
1513
- how="left",
1514
- )
1515
-
1516
- filtered_bqb_has_parts["is_shared_component"] = filtered_bqb_has_parts[
1517
- "is_shared_component"
1518
- ].fillna(False)
1519
- # drop identifiers shared as components across many species
1520
- filtered_bqb_has_parts = filtered_bqb_has_parts[
1521
- ~filtered_bqb_has_parts["is_shared_component"]
1522
- ].drop(["is_shared_component"], axis=1)
1523
1488
  # drop species parts if there are many components
1524
1489
  filtered_bqb_has_parts = filtered_bqb_has_parts[
1525
1490
  ~filtered_bqb_has_parts[SBML_DFS.S_ID].isin(big_complex_sids)
@@ -1812,8 +1777,8 @@ def export_sbml_dfs(
1812
1777
  If True then treat genes, transcript, and proteins as separate species. If False
1813
1778
  then treat them interchangeably.
1814
1779
 
1815
- Returns
1816
- -------
1780
+ Returns
1781
+ -------
1817
1782
  None
1818
1783
 
1819
1784
  """
@@ -2257,7 +2222,7 @@ def _sbml_dfs_from_edgelist_check_cspecies_merge(
2257
2222
 
2258
2223
 
2259
2224
  def _stub_compartments(
2260
- stubbed_compartment: str = "CELLULAR_COMPONENT",
2225
+ stubbed_compartment: str = GENERIC_COMPARTMENT,
2261
2226
  ) -> pd.DataFrame:
2262
2227
  """Stub Compartments
2263
2228
 
@@ -2281,7 +2246,6 @@ def _stub_compartments(
2281
2246
  f"{stubbed_compartment} is not defined in constants.COMPARTMENTS_GO_TERMS"
2282
2247
  )
2283
2248
 
2284
- stubbed_compartment_name = COMPARTMENTS[stubbed_compartment]
2285
2249
  stubbed_compartment_id = COMPARTMENTS_GO_TERMS[stubbed_compartment]
2286
2250
 
2287
2251
  formatted_uri = identifiers.format_uri(
@@ -2294,7 +2258,7 @@ def _stub_compartments(
2294
2258
 
2295
2259
  compartments_df = pd.DataFrame(
2296
2260
  {
2297
- SBML_DFS.C_NAME: [stubbed_compartment_name],
2261
+ SBML_DFS.C_NAME: [stubbed_compartment],
2298
2262
  SBML_DFS.C_IDENTIFIERS: [identifiers.Identifiers([formatted_uri])],
2299
2263
  }
2300
2264
  )
@@ -2507,9 +2471,9 @@ def validate_sbml_dfs_table(table_data: pd.DataFrame, table_name: str) -> None:
2507
2471
  table_name : str
2508
2472
  Name of the table in the SBML_dfs schema
2509
2473
 
2510
- Raises
2511
- ------
2512
- ValueError
2474
+ Raises
2475
+ ------
2476
+ ValueError
2513
2477
  If table_name is not in schema or validation fails
2514
2478
  """
2515
2479
  if table_name not in SBML_DFS_SCHEMA.SCHEMA:
@@ -2533,8 +2497,8 @@ def _perform_sbml_dfs_table_validation(
2533
2497
  This function performs the actual validation checks for any table against its schema,
2534
2498
  regardless of whether it's part of an SBML_dfs object or standalone.
2535
2499
 
2536
- Parameters
2537
- ----------
2500
+ Parameters
2501
+ ----------
2538
2502
  table_data : pd.DataFrame
2539
2503
  The table data to validate
2540
2504
  table_schema : dict
@@ -2542,9 +2506,9 @@ def _perform_sbml_dfs_table_validation(
2542
2506
  table_name : str
2543
2507
  Name of the table (for error messages)
2544
2508
 
2545
- Raises
2546
- ------
2547
- ValueError
2509
+ Raises
2510
+ ------
2511
+ ValueError
2548
2512
  If the table does not conform to its schema:
2549
2513
  - Not a DataFrame
2550
2514
  - Wrong index name
@@ -2596,3 +2560,42 @@ def _perform_sbml_dfs_table_validation(
2596
2560
  # check for empty table
2597
2561
  if table_data.shape[0] == 0:
2598
2562
  raise ValueError(f"{table_name} contained no entries")
2563
+
2564
+
2565
+ def _filter_promiscuous_components(
2566
+ bqb_has_parts_species: pd.DataFrame, max_promiscuity: int
2567
+ ) -> pd.DataFrame:
2568
+
2569
+ # number of complexes a species is part of
2570
+ n_complexes_involvedin = bqb_has_parts_species.value_counts(
2571
+ [IDENTIFIERS.ONTOLOGY, IDENTIFIERS.IDENTIFIER]
2572
+ )
2573
+ promiscuous_component_identifiers_index = n_complexes_involvedin[
2574
+ n_complexes_involvedin > max_promiscuity
2575
+ ].index
2576
+ promiscuous_component_identifiers = pd.Series(
2577
+ data=[True] * len(promiscuous_component_identifiers_index),
2578
+ index=promiscuous_component_identifiers_index,
2579
+ name="is_shared_component",
2580
+ dtype=bool,
2581
+ )
2582
+
2583
+ if len(promiscuous_component_identifiers) == 0:
2584
+ return bqb_has_parts_species
2585
+
2586
+ filtered_bqb_has_parts = bqb_has_parts_species.merge(
2587
+ promiscuous_component_identifiers,
2588
+ left_on=[IDENTIFIERS.ONTOLOGY, IDENTIFIERS.IDENTIFIER],
2589
+ right_index=True,
2590
+ how="left",
2591
+ )
2592
+
2593
+ filtered_bqb_has_parts["is_shared_component"] = (
2594
+ filtered_bqb_has_parts["is_shared_component"].astype("boolean").fillna(False)
2595
+ )
2596
+ # drop identifiers shared as components across many species
2597
+ filtered_bqb_has_parts = filtered_bqb_has_parts[
2598
+ ~filtered_bqb_has_parts["is_shared_component"]
2599
+ ].drop(["is_shared_component"], axis=1)
2600
+
2601
+ return filtered_bqb_has_parts
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: napistu
3
- Version: 0.3.2.dev1
3
+ Version: 0.3.4
4
4
  Summary: Connecting high-dimensional data to curated pathways
5
5
  Home-page: https://github.com/napistu/napistu-py
6
6
  Author: Sean Hackett
@@ -19,7 +19,7 @@ Requires-Dist: Jinja2
19
19
  Requires-Dist: PyYAML<7.0.0,>=6.0.0
20
20
  Requires-Dist: click<9.0.0,>=8.0.0
21
21
  Requires-Dist: click-logging
22
- Requires-Dist: fs<3.0.0,>=2.4.0
22
+ Requires-Dist: fs<3.0.0,>=2.4.16
23
23
  Requires-Dist: fs-gcsfs<2.0.0,>=1.5.0
24
24
  Requires-Dist: igraph
25
25
  Requires-Dist: matplotlib<4.0.0,>=3.5.0
@@ -1,10 +1,10 @@
1
1
  napistu/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
2
2
  napistu/__main__.py,sha256=PbzIsFAoFHNQuSyi-ql-D7tQLEOuqgmTcgk0PY-OGeU,28636
3
3
  napistu/consensus.py,sha256=UbKKSLP1O46e3Rk8d_aqNlhRHeR3sZRztAgIm7-XK6Y,69960
4
- napistu/constants.py,sha256=Wb1eir8DKSAjukNPknNkKTS4kQSqZjTcTjFaOYNyrC8,15317
4
+ napistu/constants.py,sha256=10SuKl19koWvCaeM2k-5GDQxvJpKY3GaGFY_4VZivXw,12356
5
5
  napistu/identifiers.py,sha256=wsVriQdvPllA5uvh5CiREklA2tYW2MIB14dV7CPaMVU,34003
6
6
  napistu/indices.py,sha256=E_djN1XWc6l1lrFw_QnQXfZTKYTaUv8-jFPP7cHkY5A,9780
7
- napistu/sbml_dfs_core.py,sha256=5t2TYQeuvZeBqR3_eX0oEfAVHyaWSYMLkEO7zmSpo4Y,91779
7
+ napistu/sbml_dfs_core.py,sha256=w4hoggMAXJ4Np45_2j-l92vOZg134eYa5Sg7SIdCaFo,91804
8
8
  napistu/sbml_dfs_utils.py,sha256=LJo6WWTrmnE58ZLDuibeeHk88uCdfunWdja7XxdZpps,11525
9
9
  napistu/source.py,sha256=9uUJrkY4jHaKlzz5nNcQQ8wUAep2pfqhlHxHw1hmEkI,13648
10
10
  napistu/utils.py,sha256=TcholWrFbRSu_sn9ODMA8y2YyAhekEKZjwf4S0WQNzI,33241
@@ -17,7 +17,7 @@ napistu/gcs/downloads.py,sha256=SvGv9WYr_Vt3guzyz1QiAuBndeKPTBtWSFLj1-QbLf4,6348
17
17
  napistu/gcs/utils.py,sha256=eLSsvewWJdCguyj2k0ozUGP5BTemaE1PZg41Z3aY5kM,571
18
18
  napistu/ingestion/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
19
19
  napistu/ingestion/bigg.py,sha256=q0HeVSO6pFftbrxxVfFGUtMvCoak9Wi9ngMggRfjFjo,4364
20
- napistu/ingestion/constants.py,sha256=JmIefW8qektzpJr5t0GBtcAq-4bVLougTrOAegm5bl8,5410
20
+ napistu/ingestion/constants.py,sha256=9UP47VImZ11q0kz17N3EJg2155USqLewwNWyKpA-cbA,8089
21
21
  napistu/ingestion/gtex.py,sha256=X0hSC1yrpf4xSJWFhpeNcnHwJzKDII2MvjfUqYA0JN8,3720
22
22
  napistu/ingestion/hpa.py,sha256=R27ExrryKQ4Crxv9ATXmBJCa-yd01TMOrDjkeBhIQac,5054
23
23
  napistu/ingestion/identifiers_etl.py,sha256=6ppDUA6lEZurdmVbiFLOUzphYbr-hndMhtqsQnq_yAc,5009
@@ -25,8 +25,8 @@ napistu/ingestion/napistu_edgelist.py,sha256=eVT9M7gmdBuGHcAYlvkD_zzvTtyzXufKWjw
25
25
  napistu/ingestion/obo.py,sha256=AQkIPWbjA464Lma0tx91JucWkIwLjC7Jgv5VHGRTDkE,9601
26
26
  napistu/ingestion/psi_mi.py,sha256=5eJjm7XWogL9oTyGqR52kntHClLwLsTePKqCvUGyi-w,10111
27
27
  napistu/ingestion/reactome.py,sha256=Hn9X-vDp4o_HK-OtaQvel3vJeZ8_TC1-4N2rruK9Oks,7099
28
- napistu/ingestion/sbml.py,sha256=muLTo-LTeL9lvvdEOjnSg82PQM2jpRExkK0UyF1qea8,20262
29
- napistu/ingestion/string.py,sha256=8igOC3j8cPdG_pMl14Z2OZsbvHUldPTJOJOUD7IJ-Mc,11676
28
+ napistu/ingestion/sbml.py,sha256=N7neMwjTEF7OMhAcNvQJ29V_d3PqMLjLOZqvJTlK9q0,24743
29
+ napistu/ingestion/string.py,sha256=YSWqaKm3I8bOixzvSA8fU4yfR2izddPYs4qJiqwjbxk,11678
30
30
  napistu/ingestion/trrust.py,sha256=ccjZc_eF3PdxxurnukiEo_e0-aKc_3z22NYbaJBtHdY,9774
31
31
  napistu/ingestion/yeast.py,sha256=bwFBNxRq-dLDaddgBL1hpfZj0eQ56nBXyR_9n0NZT9Y,5233
32
32
  napistu/matching/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
@@ -54,7 +54,7 @@ napistu/mcp/utils.py,sha256=WB4c6s8aPZLgi_Wvhhq0DE8Cnz2QGff0V8hrF1feVRg,1296
54
54
  napistu/modify/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
55
55
  napistu/modify/constants.py,sha256=H6K6twzPlxt0yp6QLAxIx0Tp8YzYhtKKXPdmXi5V_QQ,3689
56
56
  napistu/modify/curation.py,sha256=sQeSO53ZLdn14ww2GSKkoP0vJnDpAoSWb-YDjUf5hDQ,21743
57
- napistu/modify/gaps.py,sha256=nhoo30iQai16NzYYfHQ4PE42psWp5hA20Oq7yae1pzw,26743
57
+ napistu/modify/gaps.py,sha256=qprylC2BbSk_vPWayYPVT8lwURXDMOlW5zNLV_wMFZ4,26755
58
58
  napistu/modify/pathwayannot.py,sha256=xuBSMDFWbg_d6-Gzv0Td3Q5nnFTa-Qzic48g1b1AZtQ,48081
59
59
  napistu/modify/uncompartmentalize.py,sha256=U5X4Q7Z-YIkC8_711x3sU21vTVdv9rKfauwz4JNzl6c,9690
60
60
  napistu/network/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
@@ -67,7 +67,7 @@ napistu/network/net_create.py,sha256=2N5ocGmibdBxIUVtv3H36iFWwkbys9ECCERFRlByhLc
67
67
  napistu/network/net_propagation.py,sha256=89ZR4p2mGpkCCIemofZ53XbUjQsuNABxIc6UmF8A5n8,4935
68
68
  napistu/network/ng_utils.py,sha256=ijWDa5MTuULJpdV6dcVFGmLmtB_xy87jaUG7F5nvC_k,15240
69
69
  napistu/network/paths.py,sha256=S4ZaV0yVmI-o8sXfom5eXA3yy2IEbleYUyXEvnmVw98,17468
70
- napistu/network/precompute.py,sha256=xDIHWxGWwDyEw1sF1bQKHVbunI8qmeJvo3Iv7wADUys,8960
70
+ napistu/network/precompute.py,sha256=_TyztdHucczZg1JacWuXfNp5NGRKBFMGfp8Imx7OBMM,9118
71
71
  napistu/ontologies/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
72
72
  napistu/ontologies/constants.py,sha256=GyOFvezSxDK1VigATcruTKtNhjcYaid1ggulEf_HEtQ,4345
73
73
  napistu/ontologies/dogma.py,sha256=jGZS-J3d29AoUOow-HVjfVZQJ87lnqO5L1aozieN1ec,8825
@@ -81,7 +81,7 @@ napistu/rpy2/rids.py,sha256=AfXLTfTdonfspgAHYO0Ph7jSUWv8YuyT8x3fyLfAqc8,3413
81
81
  napistu/scverse/__init__.py,sha256=Lgxr3iMQAkTzXE9BNz93CndNP5djzerLvmHM-D0PU3I,357
82
82
  napistu/scverse/constants.py,sha256=0iAkhyJUIeFGHdLLU3fCaEU1O3Oix4qAsxr3CxGTjVs,653
83
83
  napistu/scverse/loading.py,sha256=jqiE71XB-wdV50GyZrauFNY0Lai4bX9Fm2Gv80VR8t8,27016
84
- napistu-0.3.2.dev1.dist-info/licenses/LICENSE,sha256=kW8wVT__JWoHjl2BbbJDAZInWa9AxzJeR_uv6-i5x1g,1063
84
+ napistu-0.3.4.dist-info/licenses/LICENSE,sha256=kW8wVT__JWoHjl2BbbJDAZInWa9AxzJeR_uv6-i5x1g,1063
85
85
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
86
86
  tests/conftest.py,sha256=XVkd0tQywhnf2cgab7fIjBo3NlaTVX3cO8HaRS2jIwM,3190
87
87
  tests/test_consensus.py,sha256=3dJvvPsPG7bHbw_FY4Pm647N_Gt_Ud9157OKYfPCUd4,9502
@@ -108,16 +108,16 @@ tests/test_network_net_create.py,sha256=VNFZTwQawAZQPDnVk_qFevgZErx5KyQZ24bMoZF4
108
108
  tests/test_network_net_propagation.py,sha256=9pKkUdduWejH4iKNCJXKFzAkdNpCfrMbiUWySgI_LH4,3244
109
109
  tests/test_network_ng_utils.py,sha256=CwDw4MKTPhVZXz2HA2XU2QjjBv8CXc1_yQ0drvkBkFw,724
110
110
  tests/test_network_paths.py,sha256=TWZnxY5bF3m6gahcxcYJGrBIawh2-_vUcec1LyPmXV8,1686
111
- tests/test_network_precompute.py,sha256=W1tuHM-dd90nk0vUUNP_xZ7EhCKSjigI5ndm8oq8l0c,8869
111
+ tests/test_network_precompute.py,sha256=xMGmZI9DxcWhJxuP7GCZEqtmcOvDRNK2LSia0x94v0U,9018
112
112
  tests/test_ontologies_genodexito.py,sha256=hBlunyEPiKskqagjWKW5Z6DJwKvpueYHJLwbfyeeAdo,2256
113
113
  tests/test_ontologies_mygene.py,sha256=BuBLm8VatzpK39-Ew_fFTK9ueLE4eqmKIDS5UKE59n8,1541
114
114
  tests/test_ontologies_renaming.py,sha256=k7bQzP24zG7W3fpULwk1me2sOWEWlxylr4Mhx1_gJJY,3740
115
115
  tests/test_pathwayannot.py,sha256=bceosccNy9tgxQei_7j7ATBSSvBSxOngJvK-mAzR_K0,3312
116
116
  tests/test_rpy2_callr.py,sha256=UVzXMvYN3wcc-ikDIjH2sA4BqkbwiNbMm561BcbnbD4,2936
117
117
  tests/test_rpy2_init.py,sha256=APrNt9GEQV9va3vU5k250TxFplAoWFc-FJRFhM2GcDk,5927
118
- tests/test_sbml.py,sha256=-Lxr_Iu7zl15c98NLG-ty9d1V9lLYHsieKbU_s96F6s,471
119
- tests/test_sbml_dfs_core.py,sha256=z2dYl5-3ZvIsEeK_sHxm8VtWSdL81Eljz_9aeedlM6U,16806
120
- tests/test_sbml_dfs_utils.py,sha256=onFWdhrTix30XR1-CMrMXld37BYxEGi6TZrweugLDzI,505
118
+ tests/test_sbml.py,sha256=f25zj1NogYrmLluvBDboLameTuCiQ309433Qn3iPvhg,1483
119
+ tests/test_sbml_dfs_core.py,sha256=tFaLMMuVjTLuhL-wimvcBbodEp59dhyHvXZ-IlUGGeU,19222
120
+ tests/test_sbml_dfs_utils.py,sha256=5lNzZ1NLOnFb_sZ0YWTgLzXy28yGNCtS_H8Q-W-T6Bw,2022
121
121
  tests/test_sbo.py,sha256=x_PENFaXYsrZIzOZu9cj_Wrej7i7SNGxgBYYvcigLs0,308
122
122
  tests/test_scverse_loading.py,sha256=bnU1lQSYYWhOAs0IIBoi4ZohqPokDQJ0n_rtkAfEyMU,29948
123
123
  tests/test_set_coverage.py,sha256=J-6m6LuOjcQa9pxRuWglSfJk4Ltm7kt_eOrn_Q-7P6Q,1604
@@ -126,8 +126,8 @@ tests/test_uncompartmentalize.py,sha256=nAk5kfAVLU9a2VWe2x2HYVcKqj-EnwmwddERIPRa
126
126
  tests/test_utils.py,sha256=JRJFmjDNZpjG59a-73JkTyGqa_a7Z8d0fE2cZt0CRII,22580
127
127
  tests/utils.py,sha256=SoWQ_5roJteFGcMaOeEiQ5ucwq3Z2Fa3AAs9iXHTsJY,749
128
128
  tests/test_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
129
- napistu-0.3.2.dev1.dist-info/METADATA,sha256=go09DE0iiD9UQmhyGSTdq2iGLGgODt84uZ9vKbridIg,3418
130
- napistu-0.3.2.dev1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
131
- napistu-0.3.2.dev1.dist-info/entry_points.txt,sha256=_QnaPOvJNA3IltxmZgWIiBoen-L1bPYX18YQfC7oJgQ,41
132
- napistu-0.3.2.dev1.dist-info/top_level.txt,sha256=Gpvk0a_PjrtqhYcQ9IDr3zR5LqpZ-uIHidQMIpjlvhY,14
133
- napistu-0.3.2.dev1.dist-info/RECORD,,
129
+ napistu-0.3.4.dist-info/METADATA,sha256=u0M0PcjZJds7ds-cV2MYar8maae9_ld_9FO--0G3xwA,3414
130
+ napistu-0.3.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
131
+ napistu-0.3.4.dist-info/entry_points.txt,sha256=_QnaPOvJNA3IltxmZgWIiBoen-L1bPYX18YQfC7oJgQ,41
132
+ napistu-0.3.4.dist-info/top_level.txt,sha256=Gpvk0a_PjrtqhYcQ9IDr3zR5LqpZ-uIHidQMIpjlvhY,14
133
+ napistu-0.3.4.dist-info/RECORD,,
@@ -179,8 +179,11 @@ def test_precomputed_distances_neighborhoods():
179
179
  right_on=join_key,
180
180
  how="outer",
181
181
  )
182
- .fillna(False)
183
182
  )
183
+ for col in ["in_precompute", "in_otf"]:
184
+ neighbor_comparison[col] = (
185
+ neighbor_comparison[col].astype("boolean").fillna(False)
186
+ )
184
187
  comparison_l.append(neighbor_comparison.assign(focal_sc_id=key))
185
188
 
186
189
  comparison_df = pd.concat(comparison_l)
tests/test_sbml.py CHANGED
@@ -1,17 +1,48 @@
1
1
  from __future__ import annotations
2
2
 
3
- import pandas as pd
4
3
  from napistu import sbml_dfs_core
5
4
  from napistu.ingestion import sbml
5
+ import pytest
6
+ from pydantic import ValidationError
6
7
 
7
8
 
8
9
  def test_sbml_dfs(sbml_path):
9
10
  sbml_model = sbml.SBML(sbml_path)
10
- _ = sbml_model.model
11
+ _ = sbml_dfs_core.SBML_dfs(sbml_model)
11
12
 
12
- dfs = sbml_dfs_core.SBML_dfs(sbml_model)
13
- dfs.validate()
14
13
 
15
- assert type(dfs.get_cspecies_features()) is pd.DataFrame
16
- assert type(dfs.get_species_features()) is pd.DataFrame
17
- assert type(dfs.get_identifiers("species")) is pd.DataFrame
14
+ def test_compartment_aliases_validation_positive():
15
+ """
16
+ Tests that a valid compartment aliases dictionary passes validation.
17
+ """
18
+ valid_aliases = {
19
+ "extracellular": ["ECM", "extracellular space"],
20
+ "cytosol": ["cytoplasm"],
21
+ }
22
+ # This should not raise an exception
23
+ sbml.CompartmentAliasesValidator.model_validate(valid_aliases)
24
+
25
+
26
+ def test_compartment_aliases_validation_negative():
27
+ """
28
+ Tests that an invalid compartment aliases dictionary raises a ValidationError.
29
+ """
30
+ invalid_aliases = {
31
+ "extracellular": ["ECM"],
32
+ "not_a_real_compartment": ["fake"],
33
+ }
34
+ with pytest.raises(ValidationError):
35
+ sbml.CompartmentAliasesValidator.model_validate(invalid_aliases)
36
+
37
+
38
+ def test_compartment_aliases_validation_bad_type():
39
+ """
40
+ Tests that a validation error is raised for incorrect data types.
41
+ """
42
+ # Test with a non-dict input
43
+ with pytest.raises(ValidationError):
44
+ sbml.CompartmentAliasesValidator.model_validate(["extracellular"])
45
+
46
+ # Test with incorrect value types in the dictionary
47
+ with pytest.raises(ValidationError):
48
+ sbml.CompartmentAliasesValidator.model_validate({"extracellular": "ECM"})
@@ -10,7 +10,14 @@ from napistu.ingestion import sbml
10
10
  from napistu.modify import pathwayannot
11
11
 
12
12
  from napistu import identifiers as napistu_identifiers
13
- from napistu.constants import SBML_DFS, SBOTERM_NAMES
13
+ from napistu.constants import (
14
+ SBML_DFS,
15
+ SBOTERM_NAMES,
16
+ BQB_DEFINING_ATTRS,
17
+ BQB_DEFINING_ATTRS_LOOSE,
18
+ BQB,
19
+ IDENTIFIERS,
20
+ )
14
21
  from napistu.sbml_dfs_core import SBML_dfs
15
22
 
16
23
 
@@ -493,3 +500,84 @@ def test_remove_entity_data_nonexistent(sbml_dfs_w_data, caplog):
493
500
 
494
501
  # Validate the model is still valid
495
502
  sbml_dfs_w_data.validate()
503
+
504
+
505
+ def test_filter_to_characteristic_species_ids():
506
+
507
+ species_ids_dict = {
508
+ SBML_DFS.S_ID: ["large_complex"] * 6
509
+ + ["small_complex"] * 2
510
+ + ["proteinA", "proteinB"]
511
+ + ["proteinC"] * 3
512
+ + [
513
+ "promiscuous_complexA",
514
+ "promiscuous_complexB",
515
+ "promiscuous_complexC",
516
+ "promiscuous_complexD",
517
+ "promiscuous_complexE",
518
+ ],
519
+ IDENTIFIERS.ONTOLOGY: ["complexportal"]
520
+ + ["HGNC"] * 7
521
+ + ["GO"] * 2
522
+ + ["ENSG", "ENSP", "pubmed"]
523
+ + ["HGNC"] * 5,
524
+ IDENTIFIERS.IDENTIFIER: [
525
+ "CPX-BIG",
526
+ "mem1",
527
+ "mem2",
528
+ "mem3",
529
+ "mem4",
530
+ "mem5",
531
+ "part1",
532
+ "part2",
533
+ "GO:1",
534
+ "GO:2",
535
+ "dna_seq",
536
+ "protein_seq",
537
+ "my_cool_pub",
538
+ ]
539
+ + ["promiscuous_complex"] * 5,
540
+ IDENTIFIERS.BQB: [BQB.IS]
541
+ + [BQB.HAS_PART] * 7
542
+ + [BQB.IS] * 2
543
+ + [
544
+ # these are retained if BQB_DEFINING_ATTRS_LOOSE is used
545
+ BQB.ENCODES,
546
+ BQB.IS_ENCODED_BY,
547
+ # this should always be removed
548
+ BQB.IS_DESCRIBED_BY,
549
+ ]
550
+ + [BQB.HAS_PART] * 5,
551
+ }
552
+
553
+ species_ids = pd.DataFrame(species_ids_dict)
554
+
555
+ characteristic_ids_narrow = sbml_dfs_core.filter_to_characteristic_species_ids(
556
+ species_ids,
557
+ defining_biological_qualifiers=BQB_DEFINING_ATTRS,
558
+ max_complex_size=4,
559
+ max_promiscuity=4,
560
+ )
561
+
562
+ EXPECTED_IDS = ["CPX-BIG", "GO:1", "GO:2", "part1", "part2"]
563
+ assert characteristic_ids_narrow[IDENTIFIERS.IDENTIFIER].tolist() == EXPECTED_IDS
564
+
565
+ characteristic_ids_loose = sbml_dfs_core.filter_to_characteristic_species_ids(
566
+ species_ids,
567
+ # include encodes and is_encoded_by as equivalent to is
568
+ defining_biological_qualifiers=BQB_DEFINING_ATTRS_LOOSE,
569
+ max_complex_size=4,
570
+ # expand promiscuity to default value
571
+ max_promiscuity=20,
572
+ )
573
+
574
+ EXPECTED_IDS = [
575
+ "CPX-BIG",
576
+ "GO:1",
577
+ "GO:2",
578
+ "dna_seq",
579
+ "protein_seq",
580
+ "part1",
581
+ "part2",
582
+ ] + ["promiscuous_complex"] * 5
583
+ assert characteristic_ids_loose[IDENTIFIERS.IDENTIFIER].tolist() == EXPECTED_IDS
@@ -1,6 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import pandas as pd
4
+
3
5
  from napistu import sbml_dfs_utils
6
+ from napistu.constants import BQB, BQB_DEFINING_ATTRS, BQB_DEFINING_ATTRS_LOOSE
4
7
 
5
8
 
6
9
  def test_id_formatter():
@@ -14,9 +17,47 @@ def test_id_formatter():
14
17
  assert list(input_vals) == inv_ids
15
18
 
16
19
 
17
- ################################################
18
- # __main__
19
- ################################################
20
-
21
- if __name__ == "__main__":
22
- test_id_formatter()
20
+ def test_get_characteristic_species_ids():
21
+ """
22
+ Test get_characteristic_species_ids function with both dogmatic and non-dogmatic cases.
23
+ """
24
+ # Create mock species identifiers data
25
+ mock_species_ids = pd.DataFrame(
26
+ {
27
+ "s_id": ["s1", "s2", "s3", "s4", "s5"],
28
+ "identifier": ["P12345", "CHEBI:15377", "GO:12345", "P67890", "P67890"],
29
+ "ontology": ["uniprot", "chebi", "go", "uniprot", "chebi"],
30
+ "bqb": [
31
+ "BQB_IS",
32
+ "BQB_IS",
33
+ "BQB_HAS_PART",
34
+ "BQB_HAS_VERSION",
35
+ "BQB_ENCODES",
36
+ ],
37
+ }
38
+ )
39
+
40
+ # Create mock SBML_dfs object
41
+ class MockSBML_dfs:
42
+ def get_identifiers(self, entity_type):
43
+ return mock_species_ids
44
+
45
+ mock_sbml = MockSBML_dfs()
46
+
47
+ # Test dogmatic case (default)
48
+ expected_bqbs = BQB_DEFINING_ATTRS + [BQB.HAS_PART] # noqa: F841
49
+ dogmatic_result = sbml_dfs_utils.get_characteristic_species_ids(mock_sbml)
50
+ expected_dogmatic = mock_species_ids.query("bqb in @expected_bqbs")
51
+
52
+ pd.testing.assert_frame_equal(dogmatic_result, expected_dogmatic, check_like=True)
53
+
54
+ # Test non-dogmatic case
55
+ expected_bqbs = BQB_DEFINING_ATTRS_LOOSE + [BQB.HAS_PART] # noqa: F841
56
+ non_dogmatic_result = sbml_dfs_utils.get_characteristic_species_ids(
57
+ mock_sbml, dogmatic=False
58
+ )
59
+ expected_non_dogmatic = mock_species_ids.query("bqb in @expected_bqbs")
60
+
61
+ pd.testing.assert_frame_equal(
62
+ non_dogmatic_result, expected_non_dogmatic, check_like=True
63
+ )