napistu 0.3.3__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
napistu/constants.py CHANGED
@@ -403,12 +403,6 @@ ONTOLOGIES = SimpleNamespace(
403
403
 
404
404
  ONTOLOGIES_LIST = list(ONTOLOGIES.__dict__.values())
405
405
 
406
- CHARACTERISTIC_COMPLEX_ONTOLOGIES = [
407
- ONTOLOGIES.ENSEMBL_GENE,
408
- ONTOLOGIES.NCBI_ENTREZ_GENE,
409
- ONTOLOGIES.MIRBASE,
410
- ]
411
-
412
406
  ONTOLOGY_SPECIES_ALIASES = {
413
407
  ONTOLOGIES.NCBI_ENTREZ_GENE: {"ncbigene", "ncbi_gene"},
414
408
  ONTOLOGIES.ENSEMBL_GENE: {"ensembl_gene_id"},
@@ -120,7 +120,7 @@ def save_precomputed_distances(
120
120
  OSError
121
121
  If the file cannot be written to (permission issues, etc.)
122
122
  """
123
- save_json(str(uri), precomputed_distances.to_dict(orient="index"))
123
+ save_json(str(uri), precomputed_distances.to_json())
124
124
 
125
125
 
126
126
  def load_precomputed_distances(uri: Union[str, Path]) -> pd.DataFrame:
@@ -143,12 +143,18 @@ def load_precomputed_distances(uri: Union[str, Path]) -> pd.DataFrame:
143
143
  If the specified file does not exist
144
144
  """
145
145
  try:
146
- data_dict = load_json(str(uri))
146
+ json_string = load_json(str(uri))
147
+ df = pd.read_json(json_string)
148
+
149
+ # Convert integer columns to float
150
+ for col in df.columns:
151
+ if df[col].dtype in ["int64", "int32", "int16", "int8"]:
152
+ df[col] = df[col].astype(float)
153
+
154
+ return df
147
155
  except ResourceNotFound as e:
148
156
  raise FileNotFoundError(f"File not found: {uri}") from e
149
157
 
150
- return pd.DataFrame.from_dict(data_dict, orient="index").rename(index=int)
151
-
152
158
 
153
159
  def _calculate_distances_subset(
154
160
  napistu_graph: NapistuGraph,
napistu/sbml_dfs_core.py CHANGED
@@ -32,7 +32,6 @@ from napistu.constants import SBOTERM_NAMES
32
32
  from napistu.constants import SBO_ROLES_DEFS
33
33
  from napistu.constants import ENTITIES_W_DATA
34
34
  from napistu.constants import ENTITIES_TO_ENTITY_DATA
35
- from napistu.constants import CHARACTERISTIC_COMPLEX_ONTOLOGIES
36
35
  from napistu.ingestion.constants import GENERIC_COMPARTMENT
37
36
  from napistu.ingestion.constants import COMPARTMENT_ALIASES
38
37
  from napistu.ingestion.constants import COMPARTMENTS_GO_TERMS
@@ -1471,12 +1470,6 @@ def filter_to_characteristic_species_ids(
1471
1470
  # add components within modestly sized protein complexes
1472
1471
  # look at HAS_PART IDs
1473
1472
  bqb_has_parts_species = species_ids[species_ids[IDENTIFIERS.BQB] == BQB.HAS_PART]
1474
- # filter to genes
1475
- bqb_has_parts_species = bqb_has_parts_species[
1476
- bqb_has_parts_species[IDENTIFIERS.ONTOLOGY].isin(
1477
- CHARACTERISTIC_COMPLEX_ONTOLOGIES
1478
- )
1479
- ]
1480
1473
 
1481
1474
  # number of species in a complex
1482
1475
  n_species_components = bqb_has_parts_species.value_counts(
@@ -1488,38 +1481,10 @@ def filter_to_characteristic_species_ids(
1488
1481
  ].index.get_level_values(SBML_DFS.S_ID)
1489
1482
  )
1490
1483
 
1491
- # number of complexes a species is part of
1492
- n_complexes_involvedin = bqb_has_parts_species.value_counts(
1493
- [IDENTIFIERS.ONTOLOGY, IDENTIFIERS.IDENTIFIER]
1494
- )
1495
- promiscuous_component_identifiers_index = n_complexes_involvedin[
1496
- n_complexes_involvedin > max_promiscuity
1497
- ].index
1498
- promiscuous_component_identifiers = pd.Series(
1499
- data=[True] * len(promiscuous_component_identifiers_index),
1500
- index=promiscuous_component_identifiers_index,
1501
- name="is_shared_component",
1502
- dtype=bool,
1503
- )
1504
-
1505
- if len(promiscuous_component_identifiers) == 0:
1506
- # no complexes to filter
1507
- return species_ids
1508
-
1509
- filtered_bqb_has_parts = bqb_has_parts_species.merge(
1510
- promiscuous_component_identifiers,
1511
- left_on=[IDENTIFIERS.ONTOLOGY, IDENTIFIERS.IDENTIFIER],
1512
- right_index=True,
1513
- how="left",
1484
+ filtered_bqb_has_parts = _filter_promiscuous_components(
1485
+ bqb_has_parts_species, max_promiscuity
1514
1486
  )
1515
1487
 
1516
- filtered_bqb_has_parts["is_shared_component"] = filtered_bqb_has_parts[
1517
- "is_shared_component"
1518
- ].fillna(False)
1519
- # drop identifiers shared as components across many species
1520
- filtered_bqb_has_parts = filtered_bqb_has_parts[
1521
- ~filtered_bqb_has_parts["is_shared_component"]
1522
- ].drop(["is_shared_component"], axis=1)
1523
1488
  # drop species parts if there are many components
1524
1489
  filtered_bqb_has_parts = filtered_bqb_has_parts[
1525
1490
  ~filtered_bqb_has_parts[SBML_DFS.S_ID].isin(big_complex_sids)
@@ -2595,3 +2560,42 @@ def _perform_sbml_dfs_table_validation(
2595
2560
  # check for empty table
2596
2561
  if table_data.shape[0] == 0:
2597
2562
  raise ValueError(f"{table_name} contained no entries")
2563
+
2564
+
2565
+ def _filter_promiscuous_components(
2566
+ bqb_has_parts_species: pd.DataFrame, max_promiscuity: int
2567
+ ) -> pd.DataFrame:
2568
+
2569
+ # number of complexes a species is part of
2570
+ n_complexes_involvedin = bqb_has_parts_species.value_counts(
2571
+ [IDENTIFIERS.ONTOLOGY, IDENTIFIERS.IDENTIFIER]
2572
+ )
2573
+ promiscuous_component_identifiers_index = n_complexes_involvedin[
2574
+ n_complexes_involvedin > max_promiscuity
2575
+ ].index
2576
+ promiscuous_component_identifiers = pd.Series(
2577
+ data=[True] * len(promiscuous_component_identifiers_index),
2578
+ index=promiscuous_component_identifiers_index,
2579
+ name="is_shared_component",
2580
+ dtype=bool,
2581
+ )
2582
+
2583
+ if len(promiscuous_component_identifiers) == 0:
2584
+ return bqb_has_parts_species
2585
+
2586
+ filtered_bqb_has_parts = bqb_has_parts_species.merge(
2587
+ promiscuous_component_identifiers,
2588
+ left_on=[IDENTIFIERS.ONTOLOGY, IDENTIFIERS.IDENTIFIER],
2589
+ right_index=True,
2590
+ how="left",
2591
+ )
2592
+
2593
+ filtered_bqb_has_parts["is_shared_component"] = (
2594
+ filtered_bqb_has_parts["is_shared_component"].astype("boolean").fillna(False)
2595
+ )
2596
+ # drop identifiers shared as components across many species
2597
+ filtered_bqb_has_parts = filtered_bqb_has_parts[
2598
+ ~filtered_bqb_has_parts["is_shared_component"]
2599
+ ].drop(["is_shared_component"], axis=1)
2600
+
2601
+ return filtered_bqb_has_parts
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: napistu
3
- Version: 0.3.3
3
+ Version: 0.3.4
4
4
  Summary: Connecting high-dimensional data to curated pathways
5
5
  Home-page: https://github.com/napistu/napistu-py
6
6
  Author: Sean Hackett
@@ -19,7 +19,7 @@ Requires-Dist: Jinja2
19
19
  Requires-Dist: PyYAML<7.0.0,>=6.0.0
20
20
  Requires-Dist: click<9.0.0,>=8.0.0
21
21
  Requires-Dist: click-logging
22
- Requires-Dist: fs<3.0.0,>=2.4.0
22
+ Requires-Dist: fs<3.0.0,>=2.4.16
23
23
  Requires-Dist: fs-gcsfs<2.0.0,>=1.5.0
24
24
  Requires-Dist: igraph
25
25
  Requires-Dist: matplotlib<4.0.0,>=3.5.0
@@ -1,10 +1,10 @@
1
1
  napistu/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
2
2
  napistu/__main__.py,sha256=PbzIsFAoFHNQuSyi-ql-D7tQLEOuqgmTcgk0PY-OGeU,28636
3
3
  napistu/consensus.py,sha256=UbKKSLP1O46e3Rk8d_aqNlhRHeR3sZRztAgIm7-XK6Y,69960
4
- napistu/constants.py,sha256=M-zGc85bo1DDRe7uvyeEMUoD9Qf59Qz53nx4R6PwHvk,12483
4
+ napistu/constants.py,sha256=10SuKl19koWvCaeM2k-5GDQxvJpKY3GaGFY_4VZivXw,12356
5
5
  napistu/identifiers.py,sha256=wsVriQdvPllA5uvh5CiREklA2tYW2MIB14dV7CPaMVU,34003
6
6
  napistu/indices.py,sha256=E_djN1XWc6l1lrFw_QnQXfZTKYTaUv8-jFPP7cHkY5A,9780
7
- napistu/sbml_dfs_core.py,sha256=TUPp2nzaxRWkHKsXrbFzgp0_p5CzEGAmA5cg3dOUm84,91794
7
+ napistu/sbml_dfs_core.py,sha256=w4hoggMAXJ4Np45_2j-l92vOZg134eYa5Sg7SIdCaFo,91804
8
8
  napistu/sbml_dfs_utils.py,sha256=LJo6WWTrmnE58ZLDuibeeHk88uCdfunWdja7XxdZpps,11525
9
9
  napistu/source.py,sha256=9uUJrkY4jHaKlzz5nNcQQ8wUAep2pfqhlHxHw1hmEkI,13648
10
10
  napistu/utils.py,sha256=TcholWrFbRSu_sn9ODMA8y2YyAhekEKZjwf4S0WQNzI,33241
@@ -67,7 +67,7 @@ napistu/network/net_create.py,sha256=2N5ocGmibdBxIUVtv3H36iFWwkbys9ECCERFRlByhLc
67
67
  napistu/network/net_propagation.py,sha256=89ZR4p2mGpkCCIemofZ53XbUjQsuNABxIc6UmF8A5n8,4935
68
68
  napistu/network/ng_utils.py,sha256=ijWDa5MTuULJpdV6dcVFGmLmtB_xy87jaUG7F5nvC_k,15240
69
69
  napistu/network/paths.py,sha256=S4ZaV0yVmI-o8sXfom5eXA3yy2IEbleYUyXEvnmVw98,17468
70
- napistu/network/precompute.py,sha256=xDIHWxGWwDyEw1sF1bQKHVbunI8qmeJvo3Iv7wADUys,8960
70
+ napistu/network/precompute.py,sha256=_TyztdHucczZg1JacWuXfNp5NGRKBFMGfp8Imx7OBMM,9118
71
71
  napistu/ontologies/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
72
72
  napistu/ontologies/constants.py,sha256=GyOFvezSxDK1VigATcruTKtNhjcYaid1ggulEf_HEtQ,4345
73
73
  napistu/ontologies/dogma.py,sha256=jGZS-J3d29AoUOow-HVjfVZQJ87lnqO5L1aozieN1ec,8825
@@ -81,7 +81,7 @@ napistu/rpy2/rids.py,sha256=AfXLTfTdonfspgAHYO0Ph7jSUWv8YuyT8x3fyLfAqc8,3413
81
81
  napistu/scverse/__init__.py,sha256=Lgxr3iMQAkTzXE9BNz93CndNP5djzerLvmHM-D0PU3I,357
82
82
  napistu/scverse/constants.py,sha256=0iAkhyJUIeFGHdLLU3fCaEU1O3Oix4qAsxr3CxGTjVs,653
83
83
  napistu/scverse/loading.py,sha256=jqiE71XB-wdV50GyZrauFNY0Lai4bX9Fm2Gv80VR8t8,27016
84
- napistu-0.3.3.dist-info/licenses/LICENSE,sha256=kW8wVT__JWoHjl2BbbJDAZInWa9AxzJeR_uv6-i5x1g,1063
84
+ napistu-0.3.4.dist-info/licenses/LICENSE,sha256=kW8wVT__JWoHjl2BbbJDAZInWa9AxzJeR_uv6-i5x1g,1063
85
85
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
86
86
  tests/conftest.py,sha256=XVkd0tQywhnf2cgab7fIjBo3NlaTVX3cO8HaRS2jIwM,3190
87
87
  tests/test_consensus.py,sha256=3dJvvPsPG7bHbw_FY4Pm647N_Gt_Ud9157OKYfPCUd4,9502
@@ -108,7 +108,7 @@ tests/test_network_net_create.py,sha256=VNFZTwQawAZQPDnVk_qFevgZErx5KyQZ24bMoZF4
108
108
  tests/test_network_net_propagation.py,sha256=9pKkUdduWejH4iKNCJXKFzAkdNpCfrMbiUWySgI_LH4,3244
109
109
  tests/test_network_ng_utils.py,sha256=CwDw4MKTPhVZXz2HA2XU2QjjBv8CXc1_yQ0drvkBkFw,724
110
110
  tests/test_network_paths.py,sha256=TWZnxY5bF3m6gahcxcYJGrBIawh2-_vUcec1LyPmXV8,1686
111
- tests/test_network_precompute.py,sha256=W1tuHM-dd90nk0vUUNP_xZ7EhCKSjigI5ndm8oq8l0c,8869
111
+ tests/test_network_precompute.py,sha256=xMGmZI9DxcWhJxuP7GCZEqtmcOvDRNK2LSia0x94v0U,9018
112
112
  tests/test_ontologies_genodexito.py,sha256=hBlunyEPiKskqagjWKW5Z6DJwKvpueYHJLwbfyeeAdo,2256
113
113
  tests/test_ontologies_mygene.py,sha256=BuBLm8VatzpK39-Ew_fFTK9ueLE4eqmKIDS5UKE59n8,1541
114
114
  tests/test_ontologies_renaming.py,sha256=k7bQzP24zG7W3fpULwk1me2sOWEWlxylr4Mhx1_gJJY,3740
@@ -116,8 +116,8 @@ tests/test_pathwayannot.py,sha256=bceosccNy9tgxQei_7j7ATBSSvBSxOngJvK-mAzR_K0,33
116
116
  tests/test_rpy2_callr.py,sha256=UVzXMvYN3wcc-ikDIjH2sA4BqkbwiNbMm561BcbnbD4,2936
117
117
  tests/test_rpy2_init.py,sha256=APrNt9GEQV9va3vU5k250TxFplAoWFc-FJRFhM2GcDk,5927
118
118
  tests/test_sbml.py,sha256=f25zj1NogYrmLluvBDboLameTuCiQ309433Qn3iPvhg,1483
119
- tests/test_sbml_dfs_core.py,sha256=z2dYl5-3ZvIsEeK_sHxm8VtWSdL81Eljz_9aeedlM6U,16806
120
- tests/test_sbml_dfs_utils.py,sha256=onFWdhrTix30XR1-CMrMXld37BYxEGi6TZrweugLDzI,505
119
+ tests/test_sbml_dfs_core.py,sha256=tFaLMMuVjTLuhL-wimvcBbodEp59dhyHvXZ-IlUGGeU,19222
120
+ tests/test_sbml_dfs_utils.py,sha256=5lNzZ1NLOnFb_sZ0YWTgLzXy28yGNCtS_H8Q-W-T6Bw,2022
121
121
  tests/test_sbo.py,sha256=x_PENFaXYsrZIzOZu9cj_Wrej7i7SNGxgBYYvcigLs0,308
122
122
  tests/test_scverse_loading.py,sha256=bnU1lQSYYWhOAs0IIBoi4ZohqPokDQJ0n_rtkAfEyMU,29948
123
123
  tests/test_set_coverage.py,sha256=J-6m6LuOjcQa9pxRuWglSfJk4Ltm7kt_eOrn_Q-7P6Q,1604
@@ -126,8 +126,8 @@ tests/test_uncompartmentalize.py,sha256=nAk5kfAVLU9a2VWe2x2HYVcKqj-EnwmwddERIPRa
126
126
  tests/test_utils.py,sha256=JRJFmjDNZpjG59a-73JkTyGqa_a7Z8d0fE2cZt0CRII,22580
127
127
  tests/utils.py,sha256=SoWQ_5roJteFGcMaOeEiQ5ucwq3Z2Fa3AAs9iXHTsJY,749
128
128
  tests/test_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
129
- napistu-0.3.3.dist-info/METADATA,sha256=Y5HCnBOuVO0Rvc1S-P3llVgWJSeBR1hes6UTt0cyhB4,3413
130
- napistu-0.3.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
131
- napistu-0.3.3.dist-info/entry_points.txt,sha256=_QnaPOvJNA3IltxmZgWIiBoen-L1bPYX18YQfC7oJgQ,41
132
- napistu-0.3.3.dist-info/top_level.txt,sha256=Gpvk0a_PjrtqhYcQ9IDr3zR5LqpZ-uIHidQMIpjlvhY,14
133
- napistu-0.3.3.dist-info/RECORD,,
129
+ napistu-0.3.4.dist-info/METADATA,sha256=u0M0PcjZJds7ds-cV2MYar8maae9_ld_9FO--0G3xwA,3414
130
+ napistu-0.3.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
131
+ napistu-0.3.4.dist-info/entry_points.txt,sha256=_QnaPOvJNA3IltxmZgWIiBoen-L1bPYX18YQfC7oJgQ,41
132
+ napistu-0.3.4.dist-info/top_level.txt,sha256=Gpvk0a_PjrtqhYcQ9IDr3zR5LqpZ-uIHidQMIpjlvhY,14
133
+ napistu-0.3.4.dist-info/RECORD,,
@@ -179,8 +179,11 @@ def test_precomputed_distances_neighborhoods():
179
179
  right_on=join_key,
180
180
  how="outer",
181
181
  )
182
- .fillna(False)
183
182
  )
183
+ for col in ["in_precompute", "in_otf"]:
184
+ neighbor_comparison[col] = (
185
+ neighbor_comparison[col].astype("boolean").fillna(False)
186
+ )
184
187
  comparison_l.append(neighbor_comparison.assign(focal_sc_id=key))
185
188
 
186
189
  comparison_df = pd.concat(comparison_l)
@@ -10,7 +10,14 @@ from napistu.ingestion import sbml
10
10
  from napistu.modify import pathwayannot
11
11
 
12
12
  from napistu import identifiers as napistu_identifiers
13
- from napistu.constants import SBML_DFS, SBOTERM_NAMES
13
+ from napistu.constants import (
14
+ SBML_DFS,
15
+ SBOTERM_NAMES,
16
+ BQB_DEFINING_ATTRS,
17
+ BQB_DEFINING_ATTRS_LOOSE,
18
+ BQB,
19
+ IDENTIFIERS,
20
+ )
14
21
  from napistu.sbml_dfs_core import SBML_dfs
15
22
 
16
23
 
@@ -493,3 +500,84 @@ def test_remove_entity_data_nonexistent(sbml_dfs_w_data, caplog):
493
500
 
494
501
  # Validate the model is still valid
495
502
  sbml_dfs_w_data.validate()
503
+
504
+
505
+ def test_filter_to_characteristic_species_ids():
506
+
507
+ species_ids_dict = {
508
+ SBML_DFS.S_ID: ["large_complex"] * 6
509
+ + ["small_complex"] * 2
510
+ + ["proteinA", "proteinB"]
511
+ + ["proteinC"] * 3
512
+ + [
513
+ "promiscuous_complexA",
514
+ "promiscuous_complexB",
515
+ "promiscuous_complexC",
516
+ "promiscuous_complexD",
517
+ "promiscuous_complexE",
518
+ ],
519
+ IDENTIFIERS.ONTOLOGY: ["complexportal"]
520
+ + ["HGNC"] * 7
521
+ + ["GO"] * 2
522
+ + ["ENSG", "ENSP", "pubmed"]
523
+ + ["HGNC"] * 5,
524
+ IDENTIFIERS.IDENTIFIER: [
525
+ "CPX-BIG",
526
+ "mem1",
527
+ "mem2",
528
+ "mem3",
529
+ "mem4",
530
+ "mem5",
531
+ "part1",
532
+ "part2",
533
+ "GO:1",
534
+ "GO:2",
535
+ "dna_seq",
536
+ "protein_seq",
537
+ "my_cool_pub",
538
+ ]
539
+ + ["promiscuous_complex"] * 5,
540
+ IDENTIFIERS.BQB: [BQB.IS]
541
+ + [BQB.HAS_PART] * 7
542
+ + [BQB.IS] * 2
543
+ + [
544
+ # these are retained if BQB_DEFINING_ATTRS_LOOSE is used
545
+ BQB.ENCODES,
546
+ BQB.IS_ENCODED_BY,
547
+ # this should always be removed
548
+ BQB.IS_DESCRIBED_BY,
549
+ ]
550
+ + [BQB.HAS_PART] * 5,
551
+ }
552
+
553
+ species_ids = pd.DataFrame(species_ids_dict)
554
+
555
+ characteristic_ids_narrow = sbml_dfs_core.filter_to_characteristic_species_ids(
556
+ species_ids,
557
+ defining_biological_qualifiers=BQB_DEFINING_ATTRS,
558
+ max_complex_size=4,
559
+ max_promiscuity=4,
560
+ )
561
+
562
+ EXPECTED_IDS = ["CPX-BIG", "GO:1", "GO:2", "part1", "part2"]
563
+ assert characteristic_ids_narrow[IDENTIFIERS.IDENTIFIER].tolist() == EXPECTED_IDS
564
+
565
+ characteristic_ids_loose = sbml_dfs_core.filter_to_characteristic_species_ids(
566
+ species_ids,
567
+ # include encodes and is_encoded_by as equivalent to is
568
+ defining_biological_qualifiers=BQB_DEFINING_ATTRS_LOOSE,
569
+ max_complex_size=4,
570
+ # expand promiscuity to default value
571
+ max_promiscuity=20,
572
+ )
573
+
574
+ EXPECTED_IDS = [
575
+ "CPX-BIG",
576
+ "GO:1",
577
+ "GO:2",
578
+ "dna_seq",
579
+ "protein_seq",
580
+ "part1",
581
+ "part2",
582
+ ] + ["promiscuous_complex"] * 5
583
+ assert characteristic_ids_loose[IDENTIFIERS.IDENTIFIER].tolist() == EXPECTED_IDS
@@ -1,6 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import pandas as pd
4
+
3
5
  from napistu import sbml_dfs_utils
6
+ from napistu.constants import BQB, BQB_DEFINING_ATTRS, BQB_DEFINING_ATTRS_LOOSE
4
7
 
5
8
 
6
9
  def test_id_formatter():
@@ -14,9 +17,47 @@ def test_id_formatter():
14
17
  assert list(input_vals) == inv_ids
15
18
 
16
19
 
17
- ################################################
18
- # __main__
19
- ################################################
20
-
21
- if __name__ == "__main__":
22
- test_id_formatter()
20
+ def test_get_characteristic_species_ids():
21
+ """
22
+ Test get_characteristic_species_ids function with both dogmatic and non-dogmatic cases.
23
+ """
24
+ # Create mock species identifiers data
25
+ mock_species_ids = pd.DataFrame(
26
+ {
27
+ "s_id": ["s1", "s2", "s3", "s4", "s5"],
28
+ "identifier": ["P12345", "CHEBI:15377", "GO:12345", "P67890", "P67890"],
29
+ "ontology": ["uniprot", "chebi", "go", "uniprot", "chebi"],
30
+ "bqb": [
31
+ "BQB_IS",
32
+ "BQB_IS",
33
+ "BQB_HAS_PART",
34
+ "BQB_HAS_VERSION",
35
+ "BQB_ENCODES",
36
+ ],
37
+ }
38
+ )
39
+
40
+ # Create mock SBML_dfs object
41
+ class MockSBML_dfs:
42
+ def get_identifiers(self, entity_type):
43
+ return mock_species_ids
44
+
45
+ mock_sbml = MockSBML_dfs()
46
+
47
+ # Test dogmatic case (default)
48
+ expected_bqbs = BQB_DEFINING_ATTRS + [BQB.HAS_PART] # noqa: F841
49
+ dogmatic_result = sbml_dfs_utils.get_characteristic_species_ids(mock_sbml)
50
+ expected_dogmatic = mock_species_ids.query("bqb in @expected_bqbs")
51
+
52
+ pd.testing.assert_frame_equal(dogmatic_result, expected_dogmatic, check_like=True)
53
+
54
+ # Test non-dogmatic case
55
+ expected_bqbs = BQB_DEFINING_ATTRS_LOOSE + [BQB.HAS_PART] # noqa: F841
56
+ non_dogmatic_result = sbml_dfs_utils.get_characteristic_species_ids(
57
+ mock_sbml, dogmatic=False
58
+ )
59
+ expected_non_dogmatic = mock_species_ids.query("bqb in @expected_bqbs")
60
+
61
+ pd.testing.assert_frame_equal(
62
+ non_dogmatic_result, expected_non_dogmatic, check_like=True
63
+ )