napistu 0.3.6__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. napistu/__main__.py +28 -13
  2. napistu/consensus.py +19 -25
  3. napistu/constants.py +102 -83
  4. napistu/indices.py +3 -1
  5. napistu/ingestion/napistu_edgelist.py +4 -4
  6. napistu/ingestion/sbml.py +298 -295
  7. napistu/ingestion/string.py +14 -18
  8. napistu/ingestion/trrust.py +22 -27
  9. napistu/matching/interactions.py +41 -39
  10. napistu/matching/species.py +1 -1
  11. napistu/modify/gaps.py +2 -1
  12. napistu/network/constants.py +61 -45
  13. napistu/network/data_handling.py +1 -1
  14. napistu/network/neighborhoods.py +3 -3
  15. napistu/network/net_create.py +440 -616
  16. napistu/network/net_create_utils.py +734 -0
  17. napistu/network/net_propagation.py +1 -1
  18. napistu/network/{napistu_graph_core.py → ng_core.py} +57 -15
  19. napistu/network/ng_utils.py +28 -21
  20. napistu/network/paths.py +4 -4
  21. napistu/network/precompute.py +35 -74
  22. napistu/ontologies/genodexito.py +5 -1
  23. napistu/ontologies/renaming.py +4 -0
  24. napistu/sbml_dfs_core.py +127 -64
  25. napistu/sbml_dfs_utils.py +50 -0
  26. napistu/utils.py +132 -46
  27. {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/METADATA +2 -2
  28. {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/RECORD +47 -44
  29. tests/conftest.py +171 -13
  30. tests/test_consensus.py +74 -5
  31. tests/test_gaps.py +26 -15
  32. tests/test_network_data_handling.py +5 -2
  33. tests/test_network_net_create.py +93 -202
  34. tests/test_network_net_create_utils.py +538 -0
  35. tests/test_network_ng_core.py +19 -0
  36. tests/test_network_ng_utils.py +1 -1
  37. tests/test_network_precompute.py +5 -4
  38. tests/test_ontologies_renaming.py +28 -24
  39. tests/test_rpy2_callr.py +0 -1
  40. tests/test_rpy2_init.py +0 -1
  41. tests/test_sbml_dfs_core.py +165 -15
  42. tests/test_sbml_dfs_utils.py +45 -0
  43. tests/test_utils.py +45 -2
  44. {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/WHEEL +0 -0
  45. {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/entry_points.txt +0 -0
  46. {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/licenses/LICENSE +0 -0
  47. {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/top_level.txt +0 -0
@@ -5,7 +5,7 @@ import pandas as pd
5
5
  import numpy as np
6
6
  import igraph as ig
7
7
 
8
- from napistu.network.napistu_graph_core import NapistuGraph
8
+ from napistu.network.ng_core import NapistuGraph
9
9
 
10
10
 
11
11
  def personalized_pagerank_by_attribute(
@@ -36,7 +36,7 @@ class NapistuGraph(ig.Graph):
36
36
  ----------
37
37
  is_reversed : bool
38
38
  Whether the graph edges have been reversed from their original direction
39
- graph_type : str or None
39
+ wiring_approach : str or None
40
40
  Type of graph (e.g., 'bipartite', 'regulatory', 'surrogate')
41
41
  weighting_strategy : str or None
42
42
  Strategy used for edge weighting (e.g., 'topology', 'mixed', 'calibrated')
@@ -101,7 +101,7 @@ class NapistuGraph(ig.Graph):
101
101
  # Initialize metadata
102
102
  self._metadata = {
103
103
  "is_reversed": False,
104
- "graph_type": None,
104
+ "wiring_approach": None,
105
105
  "weighting_strategy": None,
106
106
  "creation_params": {},
107
107
  }
@@ -147,6 +147,21 @@ class NapistuGraph(ig.Graph):
147
147
 
148
148
  return new_graph
149
149
 
150
+ @property
151
+ def is_reversed(self) -> bool:
152
+ """Check if the graph has been reversed."""
153
+ return self._metadata["is_reversed"]
154
+
155
+ @property
156
+ def wiring_approach(self) -> Optional[str]:
157
+ """Get the graph type (bipartite, regulatory, etc.)."""
158
+ return self._metadata["wiring_approach"]
159
+
160
+ @property
161
+ def weighting_strategy(self) -> Optional[str]:
162
+ """Get the weighting strategy used."""
163
+ return self._metadata["weighting_strategy"]
164
+
150
165
  def reverse_edges(self) -> None:
151
166
  """
152
167
  Reverse all edges in the graph.
@@ -181,20 +196,47 @@ class NapistuGraph(ig.Graph):
181
196
 
182
197
  return None
183
198
 
184
- @property
185
- def is_reversed(self) -> bool:
186
- """Check if the graph has been reversed."""
187
- return self._metadata["is_reversed"]
199
+ def remove_isolated_vertices(self):
200
+ """
201
+ Remove vertices that have no edges (degree 0) from the graph.
188
202
 
189
- @property
190
- def graph_type(self) -> Optional[str]:
191
- """Get the graph type (bipartite, regulatory, etc.)."""
192
- return self._metadata["graph_type"]
193
203
 
194
- @property
195
- def weighting_strategy(self) -> Optional[str]:
196
- """Get the weighting strategy used."""
197
- return self._metadata["weighting_strategy"]
204
+ Returns
205
+ -------
206
+ None
207
+ The graph is modified in-place.
208
+
209
+ """
210
+
211
+ # Find isolated vertices (degree 0)
212
+ isolated_vertices = self.vs.select(_degree=0)
213
+
214
+ if len(isolated_vertices) == 0:
215
+ logger.info("No isolated vertices found to remove")
216
+ return
217
+
218
+ # Get vertex names/indices for logging (up to 5 examples)
219
+ vertex_names = []
220
+ for v in isolated_vertices[:5]:
221
+ # Use vertex name if available, otherwise use index
222
+ name = (
223
+ v["name"]
224
+ if "name" in v.attributes() and v["name"] is not None
225
+ else str(v.index)
226
+ )
227
+ vertex_names.append(name)
228
+
229
+ # Create log message
230
+ examples_str = ", ".join(f"'{name}'" for name in vertex_names)
231
+ if len(isolated_vertices) > 5:
232
+ examples_str += f" (and {len(isolated_vertices) - 5} more)"
233
+
234
+ logger.info(
235
+ f"Removed {len(isolated_vertices)} isolated vertices: [{examples_str}]"
236
+ )
237
+
238
+ # Remove the isolated vertices
239
+ self.delete_vertices(isolated_vertices)
198
240
 
199
241
  def set_metadata(self, **kwargs) -> None:
200
242
  """
@@ -252,7 +294,7 @@ class NapistuGraph(ig.Graph):
252
294
  base_str = super().__str__()
253
295
  metadata_str = (
254
296
  f"Reversed: {self.is_reversed}, "
255
- f"Type: {self.graph_type}, "
297
+ f"Type: {self.wiring_approach}, "
256
298
  f"Weighting: {self.weighting_strategy}"
257
299
  )
258
300
  return f"{base_str}\nNapistuGraph metadata: {metadata_str}"
@@ -17,12 +17,12 @@ import pandas as pd
17
17
  from napistu import sbml_dfs_core
18
18
  from napistu import source
19
19
  from napistu.network import net_create
20
- from napistu.network.napistu_graph_core import NapistuGraph
20
+ from napistu.network.ng_core import NapistuGraph
21
21
 
22
22
  from napistu.constants import SBML_DFS
23
23
  from napistu.constants import SOURCE_SPEC
24
24
  from napistu.identifiers import _validate_assets_sbml_ids
25
- from napistu.network.constants import NAPISTU_GRAPH_TYPES
25
+ from napistu.network.constants import GRAPH_WIRING_APPROACHES
26
26
  from napistu.network.constants import NAPISTU_GRAPH_DIRECTEDNESS
27
27
 
28
28
  logger = logging.getLogger(__name__)
@@ -138,9 +138,9 @@ def export_networks(
138
138
  model_prefix: str,
139
139
  outdir: str,
140
140
  directeds: list[bool] = [True, False],
141
- graph_types: list[str] = [
142
- NAPISTU_GRAPH_TYPES.BIPARTITE,
143
- NAPISTU_GRAPH_TYPES.REGULATORY,
141
+ wiring_approaches: list[str] = [
142
+ GRAPH_WIRING_APPROACHES.BIPARTITE,
143
+ GRAPH_WIRING_APPROACHES.REGULATORY,
144
144
  ],
145
145
  ) -> None:
146
146
  """
@@ -158,10 +158,11 @@ def export_networks(
158
158
  Path to an existing directory where results should be saved
159
159
  directeds : [bool]
160
160
  List of directed types to export: a directed (True) or undirected graph be made (False)
161
- graph_types : [str]
161
+ wiring_approaches : [str]
162
162
  Types of graphs to construct, valid values are:
163
163
  - bipartite: substrates and modifiers point to the reaction they drive, this reaction points to products
164
164
  - regulatory: non-enzymatic modifiers point to enzymes, enzymes point to substrates and products
165
+ - surrogate regulatory approach but with substrates upstream of enzymes
165
166
 
166
167
  Returns:
167
168
  ----------
@@ -177,24 +178,26 @@ def export_networks(
177
178
  raise FileNotFoundError(f"{outdir} does not exist")
178
179
  if not isinstance(directeds, list):
179
180
  raise TypeError(f"directeds must be a list, but was {type(directeds)}")
180
- if not isinstance(graph_types, list):
181
- raise TypeError(f"graph_types must be a list but was a {type(graph_types)}")
181
+ if not isinstance(wiring_approaches, list):
182
+ raise TypeError(
183
+ f"wiring_approaches must be a list but was a {type(wiring_approaches)}"
184
+ )
182
185
 
183
- # iterate through provided graph_types and export each type
184
- for graph_type in graph_types:
186
+ # iterate through provided wiring_approaches and export each type
187
+ for wiring_approach in wiring_approaches:
185
188
  for directed in directeds:
186
189
  export_pkl_path = _create_network_save_string(
187
190
  model_prefix=model_prefix,
188
191
  outdir=outdir,
189
192
  directed=directed,
190
- graph_type=graph_type,
193
+ wiring_approach=wiring_approach,
191
194
  )
192
- print(f"Exporting {graph_type} network to {export_pkl_path}")
195
+ print(f"Exporting {wiring_approach} network to {export_pkl_path}")
193
196
 
194
197
  network_graph = net_create.process_napistu_graph(
195
198
  sbml_dfs=sbml_dfs,
196
199
  directed=directed,
197
- graph_type=graph_type,
200
+ wiring_approach=wiring_approach,
198
201
  verbose=True,
199
202
  )
200
203
 
@@ -206,7 +209,7 @@ def export_networks(
206
209
  def read_network_pkl(
207
210
  model_prefix: str,
208
211
  network_dir: str,
209
- graph_type: str,
212
+ wiring_approach: str,
210
213
  directed: bool = True,
211
214
  ) -> NapistuGraph:
212
215
  """
@@ -222,10 +225,11 @@ def read_network_pkl(
222
225
  Path to a directory containing all saved networks.
223
226
  directed : bool
224
227
  Should a directed (True) or undirected graph be loaded (False)
225
- graph_type : [str]
228
+ wiring_approach : [str]
226
229
  Type of graphs to read, valid values are:
227
230
  - bipartite: substrates and modifiers point to the reaction they drive, this reaction points to products
228
231
  - reguatory: non-enzymatic modifiers point to enzymes, enzymes point to substrates and products
232
+ - surrogate regulatory approach but with substrates upstream of enzymes
229
233
 
230
234
  Returns
231
235
  -------
@@ -239,15 +243,17 @@ def read_network_pkl(
239
243
  raise FileNotFoundError(f"{network_dir} does not exist")
240
244
  if not isinstance(directed, bool):
241
245
  raise TypeError(f"directed must be a bool, but was {type(directed)}")
242
- if not isinstance(graph_type, str):
243
- raise TypeError(f"graph_type must be a str but was a {type(graph_type)}")
246
+ if not isinstance(wiring_approach, str):
247
+ raise TypeError(
248
+ f"wiring_approach must be a str but was a {type(wiring_approach)}"
249
+ )
244
250
 
245
251
  import_pkl_path = _create_network_save_string(
246
- model_prefix, network_dir, directed, graph_type
252
+ model_prefix, network_dir, directed, wiring_approach
247
253
  )
248
254
  if not os.path.isfile(import_pkl_path):
249
255
  raise FileNotFoundError(f"{import_pkl_path} does not exist")
250
- print(f"Importing {graph_type} network from {import_pkl_path}")
256
+ print(f"Importing {wiring_approach} network from {import_pkl_path}")
251
257
 
252
258
  network_graph = ig.Graph.Read_Pickle(fname=import_pkl_path)
253
259
 
@@ -374,7 +380,7 @@ def read_graph_attrs_spec(graph_attrs_spec_uri: str) -> dict:
374
380
 
375
381
  # Internal utility functions
376
382
  def _create_network_save_string(
377
- model_prefix: str, outdir: str, directed: bool, graph_type: str
383
+ model_prefix: str, outdir: str, directed: bool, wiring_approach: str
378
384
  ) -> str:
379
385
  if directed:
380
386
  directed_str = NAPISTU_GRAPH_DIRECTEDNESS.DIRECTED
@@ -382,7 +388,8 @@ def _create_network_save_string(
382
388
  directed_str = NAPISTU_GRAPH_DIRECTEDNESS.UNDIRECTED
383
389
 
384
390
  export_pkl_path = os.path.join(
385
- outdir, model_prefix + "_network_" + graph_type + "_" + directed_str + ".pkl"
391
+ outdir,
392
+ model_prefix + "_network_" + wiring_approach + "_" + directed_str + ".pkl",
386
393
  )
387
394
 
388
395
  return export_pkl_path
napistu/network/paths.py CHANGED
@@ -9,9 +9,9 @@ import pandas as pd
9
9
 
10
10
  from napistu import sbml_dfs_core
11
11
  from napistu import utils
12
- from napistu.network.napistu_graph_core import NapistuGraph
12
+ from napistu.network.ng_core import NapistuGraph
13
13
  from napistu.network.ng_utils import get_minimal_sources_edges
14
- from napistu.constants import CPR_PATH_REQ_VARS
14
+ from napistu.constants import NAPISTU_PATH_REQ_VARS
15
15
  from napistu.constants import MINI_SBO_NAME_TO_POLARITY
16
16
  from napistu.constants import MINI_SBO_TO_NAME
17
17
  from napistu.constants import SBML_DFS
@@ -391,7 +391,7 @@ def _filter_paths_by_precomputed_distances(
391
391
  ) -> pd.DataFrame:
392
392
  """Filter source -> destination pairs based on precomputed distances if they were provided."""
393
393
 
394
- utils.match_pd_vars(all_species_pairs, CPR_PATH_REQ_VARS).assert_present()
394
+ utils.match_pd_vars(all_species_pairs, NAPISTU_PATH_REQ_VARS).assert_present()
395
395
 
396
396
  if precomputed_distances is None:
397
397
  logger.info(
@@ -402,7 +402,7 @@ def _filter_paths_by_precomputed_distances(
402
402
  if not isinstance(precomputed_distances, pd.DataFrame):
403
403
  raise TypeError('"precomputed_distances" must be a pd.DataFrame')
404
404
 
405
- utils.match_pd_vars(precomputed_distances, CPR_PATH_REQ_VARS).assert_present()
405
+ utils.match_pd_vars(precomputed_distances, NAPISTU_PATH_REQ_VARS).assert_present()
406
406
 
407
407
  # filter to pairs which are connected in the pre-computed distances table
408
408
  valid_all_species_pairs = all_species_pairs.merge(
@@ -2,17 +2,16 @@ from __future__ import annotations
2
2
 
3
3
  import logging
4
4
  import math
5
- from pathlib import Path
6
- from typing import Union
7
- import io
8
5
 
9
6
  import numpy as np
10
7
  import pandas as pd
11
- from fs.errors import ResourceNotFound
12
8
 
13
- from napistu.network.napistu_graph_core import NapistuGraph
9
+ from napistu.network.ng_core import NapistuGraph
14
10
  from napistu.network.ig_utils import validate_edge_attributes
15
- from napistu.utils import load_json, save_json
11
+ from napistu.constants import NAPISTU_EDGELIST, SBML_DFS
12
+ from napistu.network.constants import (
13
+ NAPISTU_GRAPH_EDGES,
14
+ )
16
15
 
17
16
  logger = logging.getLogger(__name__)
18
17
 
@@ -22,10 +21,13 @@ def precompute_distances(
22
21
  max_steps: int = -1,
23
22
  max_score_q: float = float(1),
24
23
  partition_size: int = int(5000),
25
- weights_vars: list[str] = ["weights", "upstream_weights"],
24
+ weights_vars: list[str] = [
25
+ NAPISTU_GRAPH_EDGES.WEIGHTS,
26
+ NAPISTU_GRAPH_EDGES.UPSTREAM_WEIGHTS,
27
+ ],
26
28
  ) -> pd.DataFrame:
27
29
  """
28
- Pre-Compute Distances
30
+ Precompute Distances between all pairs of species in a NapistuGraph network.
29
31
 
30
32
  Parameters
31
33
  ----------
@@ -80,6 +82,7 @@ def precompute_distances(
80
82
  # interate through all partitions of "from" nodes and find their shortest and lowest weighted paths
81
83
  unique_partitions = vs_to_partition.index.unique().tolist()
82
84
 
85
+ logger.info(f"Calculating distances for {len(unique_partitions)} partitions")
83
86
  precomputed_distances = pd.concat(
84
87
  [
85
88
  _calculate_distances_subset(
@@ -93,6 +96,10 @@ def precompute_distances(
93
96
  ).query("sc_id_origin != sc_id_dest")
94
97
 
95
98
  # filter by path length and/or weight
99
+
100
+ logger.info(
101
+ f"Filtering distances by path length ({max_steps}) and score quantile ({max_score_q})"
102
+ )
96
103
  filtered_precomputed_distances = _filter_precomputed_distances(
97
104
  precomputed_distances=precomputed_distances,
98
105
  max_steps=max_steps,
@@ -103,65 +110,14 @@ def precompute_distances(
103
110
  return filtered_precomputed_distances
104
111
 
105
112
 
106
- def save_precomputed_distances(
107
- precomputed_distances: pd.DataFrame, uri: Union[str, Path]
108
- ) -> None:
109
- """
110
- Save a precomputed distances DataFrame to a JSON file.
111
-
112
- Parameters
113
- ----------
114
- precomputed_distances : pd.DataFrame
115
- The precomputed distances DataFrame to save
116
- uri : Union[str, Path]
117
- Path where to save the JSON file. Can be a local path or a GCS URI.
118
-
119
- Raises
120
- ------
121
- OSError
122
- If the file cannot be written to (permission issues, etc.)
123
- """
124
- save_json(str(uri), precomputed_distances.to_json())
125
-
126
-
127
- def load_precomputed_distances(uri: Union[str, Path]) -> pd.DataFrame:
128
- """
129
- Load a precomputed distances DataFrame from a JSON file.
130
-
131
- Parameters
132
- ----------
133
- uri : Union[str, Path]
134
- Path to the JSON file to load
135
-
136
- Returns
137
- -------
138
- pd.DataFrame
139
- The reconstructed precomputed distances DataFrame
140
-
141
- Raises
142
- ------
143
- FileNotFoundError
144
- If the specified file does not exist
145
- """
146
- try:
147
- json_string = load_json(str(uri))
148
- df = pd.read_json(io.StringIO(json_string))
149
-
150
- # Convert integer columns to float
151
- for col in df.columns:
152
- if df[col].dtype in ["int64", "int32", "int16", "int8"]:
153
- df[col] = df[col].astype(float)
154
-
155
- return df
156
- except ResourceNotFound as e:
157
- raise FileNotFoundError(f"File not found: {uri}") from e
158
-
159
-
160
113
  def _calculate_distances_subset(
161
114
  napistu_graph: NapistuGraph,
162
115
  vs_to_partition: pd.DataFrame,
163
116
  one_partition: pd.DataFrame,
164
- weights_vars: list[str] = ["weights", "upstream_weights"],
117
+ weights_vars: list[str] = [
118
+ NAPISTU_GRAPH_EDGES.WEIGHTS,
119
+ NAPISTU_GRAPH_EDGES.UPSTREAM_WEIGHTS,
120
+ ],
165
121
  ) -> pd.DataFrame:
166
122
  """Calculate distances from a subset of vertices to all vertices."""
167
123
 
@@ -169,14 +125,15 @@ def _calculate_distances_subset(
169
125
  pd.DataFrame(
170
126
  np.array(
171
127
  napistu_graph.distances(
172
- source=one_partition["sc_id"], target=vs_to_partition["sc_id"]
128
+ source=one_partition[SBML_DFS.SC_ID],
129
+ target=vs_to_partition[SBML_DFS.SC_ID],
173
130
  )
174
131
  ),
175
- index=one_partition["sc_id"].rename("sc_id_origin"),
176
- columns=vs_to_partition["sc_id"].rename("sc_id_dest"),
132
+ index=one_partition[SBML_DFS.SC_ID].rename(NAPISTU_EDGELIST.SC_ID_ORIGIN),
133
+ columns=vs_to_partition[SBML_DFS.SC_ID].rename(NAPISTU_EDGELIST.SC_ID_DEST),
177
134
  )
178
135
  .reset_index()
179
- .melt("sc_id_origin", value_name="path_length")
136
+ .melt(NAPISTU_EDGELIST.SC_ID_ORIGIN, value_name="path_length")
180
137
  .replace([np.inf, -np.inf], np.nan, inplace=False)
181
138
  .dropna()
182
139
  )
@@ -187,16 +144,20 @@ def _calculate_distances_subset(
187
144
  pd.DataFrame(
188
145
  np.array(
189
146
  napistu_graph.distances(
190
- source=one_partition["sc_id"],
191
- target=vs_to_partition["sc_id"],
147
+ source=one_partition[SBML_DFS.SC_ID],
148
+ target=vs_to_partition[SBML_DFS.SC_ID],
192
149
  weights=weight_type,
193
150
  )
194
151
  ),
195
- index=one_partition["sc_id"].rename("sc_id_origin"),
196
- columns=vs_to_partition["sc_id"].rename("sc_id_dest"),
152
+ index=one_partition[SBML_DFS.SC_ID].rename(
153
+ NAPISTU_EDGELIST.SC_ID_ORIGIN
154
+ ),
155
+ columns=vs_to_partition[SBML_DFS.SC_ID].rename(
156
+ NAPISTU_EDGELIST.SC_ID_DEST
157
+ ),
197
158
  )
198
159
  .reset_index()
199
- .melt("sc_id_origin", value_name=f"path_{weight_type}")
160
+ .melt(NAPISTU_EDGELIST.SC_ID_ORIGIN, value_name=f"path_{weight_type}")
200
161
  .replace([np.inf, -np.inf], np.nan, inplace=False)
201
162
  .dropna()
202
163
  )
@@ -211,8 +172,8 @@ def _calculate_distances_subset(
211
172
  # note: these may be different paths! e.g., a longer path may have a lower weight than a short one
212
173
  path_summaries = d_steps.merge(
213
174
  d_weights,
214
- left_on=["sc_id_origin", "sc_id_dest"],
215
- right_on=["sc_id_origin", "sc_id_dest"],
175
+ left_on=[NAPISTU_EDGELIST.SC_ID_ORIGIN, NAPISTU_EDGELIST.SC_ID_DEST],
176
+ right_on=[NAPISTU_EDGELIST.SC_ID_ORIGIN, NAPISTU_EDGELIST.SC_ID_DEST],
216
177
  )
217
178
 
218
179
  # return connected species
@@ -356,7 +356,7 @@ class Genodexito:
356
356
  )
357
357
  logger.debug(
358
358
  f"{ids.shape[0] - expanded_ids.shape[0]} "
359
- "ids are not included in expanded ids"
359
+ "ids are not included in expanded ids. These will be filled with empty Identifiers"
360
360
  )
361
361
  else:
362
362
  matched_expanded_ids = expanded_ids
@@ -364,6 +364,10 @@ class Genodexito:
364
364
  updated_ids = ids.drop(SBML_DFS.S_IDENTIFIERS, axis=1).join(
365
365
  pd.DataFrame(matched_expanded_ids)
366
366
  )
367
+ # fill missing attributes with empty Identifiers
368
+ updated_ids[SBML_DFS.S_IDENTIFIERS] = updated_ids[
369
+ SBML_DFS.S_IDENTIFIERS
370
+ ].fillna(identifiers.Identifiers([]))
367
371
 
368
372
  setattr(sbml_dfs, "species", updated_ids)
369
373
 
@@ -72,6 +72,10 @@ def rename_species_ontologies(
72
72
  updated_species = sbml_dfs.species.drop(SBML_DFS.S_IDENTIFIERS, axis=1).join(
73
73
  pd.DataFrame(species_identifiers)
74
74
  )
75
+ # fill missing attributes with empty Identifiers
76
+ updated_species[SBML_DFS.S_IDENTIFIERS] = updated_species[
77
+ SBML_DFS.S_IDENTIFIERS
78
+ ].fillna(identifiers.Identifiers([]))
75
79
 
76
80
  setattr(sbml_dfs, "species", updated_species)
77
81