napistu 0.3.7__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. napistu/__main__.py +8 -4
  2. napistu/constants.py +30 -35
  3. napistu/gcs/constants.py +11 -11
  4. napistu/ingestion/napistu_edgelist.py +4 -4
  5. napistu/matching/interactions.py +41 -39
  6. napistu/modify/gaps.py +2 -1
  7. napistu/network/constants.py +61 -45
  8. napistu/network/data_handling.py +1 -1
  9. napistu/network/neighborhoods.py +3 -3
  10. napistu/network/net_create.py +440 -616
  11. napistu/network/net_create_utils.py +734 -0
  12. napistu/network/net_propagation.py +1 -1
  13. napistu/network/{napistu_graph_core.py → ng_core.py} +57 -15
  14. napistu/network/ng_utils.py +28 -21
  15. napistu/network/paths.py +4 -4
  16. napistu/network/precompute.py +35 -74
  17. napistu/ontologies/id_tables.py +282 -0
  18. napistu/sbml_dfs_core.py +53 -63
  19. napistu/sbml_dfs_utils.py +126 -16
  20. napistu/utils.py +80 -5
  21. {napistu-0.3.7.dist-info → napistu-0.4.1.dist-info}/METADATA +7 -2
  22. {napistu-0.3.7.dist-info → napistu-0.4.1.dist-info}/RECORD +39 -34
  23. tests/conftest.py +102 -1
  24. tests/test_network_data_handling.py +5 -2
  25. tests/test_network_net_create.py +92 -201
  26. tests/test_network_net_create_utils.py +538 -0
  27. tests/test_network_ng_core.py +19 -0
  28. tests/test_network_ng_utils.py +1 -1
  29. tests/test_network_precompute.py +4 -3
  30. tests/test_ontologies_id_tables.py +198 -0
  31. tests/test_rpy2_callr.py +0 -1
  32. tests/test_rpy2_init.py +0 -1
  33. tests/test_sbml_dfs_core.py +30 -19
  34. tests/test_sbml_dfs_utils.py +115 -0
  35. tests/test_utils.py +26 -2
  36. {napistu-0.3.7.dist-info → napistu-0.4.1.dist-info}/WHEEL +0 -0
  37. {napistu-0.3.7.dist-info → napistu-0.4.1.dist-info}/entry_points.txt +0 -0
  38. {napistu-0.3.7.dist-info → napistu-0.4.1.dist-info}/licenses/LICENSE +0 -0
  39. {napistu-0.3.7.dist-info → napistu-0.4.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,198 @@
1
+ import pytest
2
+ import pandas as pd
3
+ from unittest.mock import patch
4
+
5
+ from napistu.ontologies import id_tables
6
+ from napistu.constants import (
7
+ BQB,
8
+ IDENTIFIERS,
9
+ ONTOLOGIES,
10
+ SBML_DFS,
11
+ VALID_BQB_TERMS,
12
+ )
13
+
14
+
15
+ @pytest.fixture
16
+ def sample_id_table():
17
+ """Create a sample DataFrame for testing."""
18
+ return pd.DataFrame(
19
+ {
20
+ SBML_DFS.S_ID: ["s1", "s2", "s3", "s4"],
21
+ IDENTIFIERS.ONTOLOGY: [
22
+ ONTOLOGIES.GO,
23
+ ONTOLOGIES.KEGG,
24
+ ONTOLOGIES.REACTOME,
25
+ ONTOLOGIES.WIKIPATHWAYS,
26
+ ],
27
+ IDENTIFIERS.IDENTIFIER: ["GO:0001", "hsa00010", "R-HSA-123", "WP123"],
28
+ IDENTIFIERS.BQB: [BQB.IS, BQB.HAS_PART, BQB.IS_PART_OF, BQB.IS_VERSION_OF],
29
+ IDENTIFIERS.URL: ["foo", "bar", "baz", "qux"],
30
+ "other_col": ["a", "b", "c", "d"],
31
+ }
32
+ )
33
+
34
+
35
+ def test_sanitize_id_table_values_valid_cases(sample_id_table):
36
+ """Test all valid use cases for _sanitize_id_table_values function."""
37
+
38
+ # Test string input conversion
39
+ result = id_tables._sanitize_id_table_values(
40
+ ONTOLOGIES.GO, sample_id_table, IDENTIFIERS.ONTOLOGY
41
+ )
42
+ assert result == {ONTOLOGIES.GO}
43
+ assert isinstance(result, set)
44
+
45
+ # Test list input conversion
46
+ result = id_tables._sanitize_id_table_values(
47
+ [ONTOLOGIES.GO, ONTOLOGIES.KEGG], sample_id_table, IDENTIFIERS.ONTOLOGY
48
+ )
49
+ assert result == {ONTOLOGIES.GO, ONTOLOGIES.KEGG}
50
+ assert isinstance(result, set)
51
+
52
+ # Test set input unchanged
53
+ input_set = {ONTOLOGIES.GO, ONTOLOGIES.KEGG}
54
+ result = id_tables._sanitize_id_table_values(
55
+ input_set, sample_id_table, IDENTIFIERS.ONTOLOGY
56
+ )
57
+ assert result == input_set
58
+ assert isinstance(result, set)
59
+
60
+ # Test successful validation against valid_values
61
+ result = id_tables._sanitize_id_table_values(
62
+ BQB.IS, sample_id_table, IDENTIFIERS.BQB, set(VALID_BQB_TERMS)
63
+ )
64
+ assert result == {BQB.IS}
65
+
66
+ # Test duplicate values in input list are handled correctly
67
+ result = id_tables._sanitize_id_table_values(
68
+ [ONTOLOGIES.GO, ONTOLOGIES.GO, ONTOLOGIES.KEGG],
69
+ sample_id_table,
70
+ IDENTIFIERS.ONTOLOGY,
71
+ )
72
+ assert result == {
73
+ ONTOLOGIES.GO,
74
+ ONTOLOGIES.KEGG,
75
+ } # Duplicates removed by set conversion
76
+
77
+ # Test all values present in table
78
+ result = id_tables._sanitize_id_table_values(
79
+ [ONTOLOGIES.GO, ONTOLOGIES.KEGG, ONTOLOGIES.REACTOME],
80
+ sample_id_table,
81
+ IDENTIFIERS.ONTOLOGY,
82
+ )
83
+ assert result == {ONTOLOGIES.GO, ONTOLOGIES.KEGG, ONTOLOGIES.REACTOME}
84
+
85
+ # Test single value present in table
86
+ result = id_tables._sanitize_id_table_values(
87
+ ONTOLOGIES.WIKIPATHWAYS, sample_id_table, IDENTIFIERS.ONTOLOGY
88
+ )
89
+ assert result == {ONTOLOGIES.WIKIPATHWAYS}
90
+
91
+ # Test with different column (BQB)
92
+ result = id_tables._sanitize_id_table_values(
93
+ BQB.HAS_PART, sample_id_table, IDENTIFIERS.BQB
94
+ )
95
+ assert result == {BQB.HAS_PART}
96
+
97
+
98
+ @patch("napistu.ontologies.id_tables.logger")
99
+ def test_sanitize_id_table_values_error_cases(mock_logger, sample_id_table):
100
+ """Test error cases and edge cases for _sanitize_id_table_values function."""
101
+
102
+ # Test invalid input types raise ValueError
103
+ with pytest.raises(ValueError, match="ontology must be a string, a set, or list"):
104
+ id_tables._sanitize_id_table_values(123, sample_id_table, IDENTIFIERS.ONTOLOGY)
105
+
106
+ with pytest.raises(ValueError, match="ontology must be a string, a set, or list"):
107
+ id_tables._sanitize_id_table_values(
108
+ {"key": "value"}, sample_id_table, IDENTIFIERS.ONTOLOGY
109
+ )
110
+
111
+ # Test validation failure against valid_values
112
+ with pytest.raises(
113
+ ValueError, match="The following bqb are not valid: INVALID_BQB"
114
+ ):
115
+ id_tables._sanitize_id_table_values(
116
+ "INVALID_BQB", sample_id_table, IDENTIFIERS.BQB, set(VALID_BQB_TERMS), "bqb"
117
+ )
118
+
119
+ # Test multiple invalid values against valid_values
120
+ with pytest.raises(ValueError, match="The following bqb are not valid"):
121
+ id_tables._sanitize_id_table_values(
122
+ ["INVALID1", "INVALID2"],
123
+ sample_id_table,
124
+ IDENTIFIERS.BQB,
125
+ set(VALID_BQB_TERMS),
126
+ "bqb",
127
+ )
128
+
129
+ # Test all values missing from table raises error
130
+ missing_values = {"MISSING1", "MISSING2"}
131
+ with pytest.raises(ValueError, match="None of the requested ontology are present"):
132
+ id_tables._sanitize_id_table_values(
133
+ missing_values, sample_id_table, IDENTIFIERS.ONTOLOGY
134
+ )
135
+
136
+ # Test case-sensitive matching (lowercase 'go' should fail)
137
+ with pytest.raises(ValueError, match="None of the requested ontology are present"):
138
+ id_tables._sanitize_id_table_values(
139
+ "INVALID_ONTOLOGY", sample_id_table, IDENTIFIERS.ONTOLOGY
140
+ )
141
+
142
+ # Test custom value_type_name in error messages
143
+ with pytest.raises(ValueError, match="custom_type must be a string"):
144
+ id_tables._sanitize_id_table_values(
145
+ 123, sample_id_table, IDENTIFIERS.ONTOLOGY, value_type_name="custom_type"
146
+ )
147
+
148
+ # Test default value_type_name uses column_name
149
+ with pytest.raises(ValueError, match="test_column must be a string"):
150
+ id_tables._sanitize_id_table_values(123, sample_id_table, "test_column")
151
+
152
+ # Test empty dataframe column
153
+ empty_df = pd.DataFrame({"ontology": []})
154
+ with pytest.raises(ValueError, match="None of the requested ontology are present"):
155
+ id_tables._sanitize_id_table_values("GO", empty_df, IDENTIFIERS.ONTOLOGY)
156
+
157
+ # Test partial values missing logs warning but doesn't raise error
158
+ mixed_values = {ONTOLOGIES.GO, "MISSING"} # GO exists, MISSING doesn't
159
+ result = id_tables._sanitize_id_table_values(
160
+ mixed_values, sample_id_table, IDENTIFIERS.ONTOLOGY
161
+ )
162
+
163
+ assert result == mixed_values
164
+ mock_logger.warning.assert_called_once()
165
+ warning_call = mock_logger.warning.call_args[0][0]
166
+ assert "MISSING" in warning_call
167
+ assert "not present in the id_table" in warning_call
168
+
169
+ # Test multiple partial missing values
170
+ mock_logger.reset_mock()
171
+ mixed_values = {ONTOLOGIES.GO, ONTOLOGIES.KEGG, "MISSING1", "MISSING2"}
172
+ result = id_tables._sanitize_id_table_values(
173
+ mixed_values, sample_id_table, IDENTIFIERS.ONTOLOGY
174
+ )
175
+
176
+ assert result == mixed_values
177
+ mock_logger.warning.assert_called_once()
178
+ warning_call = mock_logger.warning.call_args[0][0]
179
+ assert "MISSING1" in warning_call and "MISSING2" in warning_call
180
+
181
+
182
+ def test_filter_id_table_basic(sample_id_table):
183
+ """Basic test for filter_id_table filtering by identifier, ontology, and bqb."""
184
+
185
+ # Use a known identifier, ontology, and bqb from the fixture
186
+ filtered = id_tables.filter_id_table(
187
+ id_table=sample_id_table,
188
+ identifiers=["GO:0001"],
189
+ ontologies=[ONTOLOGIES.GO],
190
+ bqbs=[BQB.IS],
191
+ )
192
+ # Should return a DataFrame with only the matching row
193
+ assert isinstance(filtered, pd.DataFrame)
194
+ assert len(filtered) == 1
195
+ row = filtered.iloc[0]
196
+ assert row[IDENTIFIERS.ONTOLOGY] == ONTOLOGIES.GO
197
+ assert row[IDENTIFIERS.IDENTIFIER] == "GO:0001"
198
+ assert row[IDENTIFIERS.BQB] == BQB.IS
tests/test_rpy2_callr.py CHANGED
@@ -16,7 +16,6 @@ sys.modules["rpy2.robjects.pandas2ri"] = Mock()
16
16
  sys.modules["rpy2.rinterface"] = Mock()
17
17
  sys.modules["rpy2_arrow"] = Mock()
18
18
  sys.modules["rpy2_arrow.arrow"] = Mock()
19
- sys.modules["pyarrow"] = Mock()
20
19
 
21
20
  import napistu.rpy2.callr # noqa: E402
22
21
 
tests/test_rpy2_init.py CHANGED
@@ -15,7 +15,6 @@ sys.modules["rpy2.robjects.pandas2ri"] = Mock()
15
15
  sys.modules["rpy2.rinterface"] = Mock()
16
16
  sys.modules["rpy2_arrow"] = Mock()
17
17
  sys.modules["rpy2_arrow.arrow"] = Mock()
18
- sys.modules["pyarrow"] = Mock()
19
18
 
20
19
  import napistu.rpy2 # noqa: E402
21
20
 
@@ -13,10 +13,12 @@ from napistu.modify import pathwayannot
13
13
 
14
14
  from napistu import identifiers as napistu_identifiers
15
15
  from napistu.constants import (
16
- SBML_DFS,
16
+ BQB,
17
17
  BQB_DEFINING_ATTRS,
18
18
  BQB_DEFINING_ATTRS_LOOSE,
19
- BQB,
19
+ SBML_DFS,
20
+ SCHEMA_DEFS,
21
+ ONTOLOGIES,
20
22
  )
21
23
  from napistu.sbml_dfs_core import SBML_dfs
22
24
  from unittest.mock import patch
@@ -291,53 +293,62 @@ def test_read_sbml_with_invalid_ids():
291
293
 
292
294
 
293
295
  def test_get_table(sbml_dfs):
294
- assert isinstance(sbml_dfs.get_table("species"), pd.DataFrame)
295
- assert isinstance(sbml_dfs.get_table("species", {"id"}), pd.DataFrame)
296
+ assert isinstance(sbml_dfs.get_table(SBML_DFS.SPECIES), pd.DataFrame)
297
+ assert isinstance(
298
+ sbml_dfs.get_table(SBML_DFS.SPECIES, {SCHEMA_DEFS.ID}), pd.DataFrame
299
+ )
296
300
 
297
301
  # invalid table
298
302
  with pytest.raises(ValueError):
299
- sbml_dfs.get_table("foo", {"id"})
303
+ sbml_dfs.get_table("foo", {SCHEMA_DEFS.ID})
300
304
 
301
305
  # bad type
302
306
  with pytest.raises(TypeError):
303
- sbml_dfs.get_table("reaction_species", "id")
307
+ sbml_dfs.get_table(SBML_DFS.REACTION_SPECIES, SCHEMA_DEFS.ID)
304
308
 
305
309
  # reaction species don't have ids
306
310
  with pytest.raises(ValueError):
307
- sbml_dfs.get_table("reaction_species", {"id"})
311
+ sbml_dfs.get_table(SBML_DFS.REACTION_SPECIES, {SCHEMA_DEFS.ID})
308
312
 
309
313
 
310
314
  def test_search_by_name(sbml_dfs_metabolism):
311
- assert sbml_dfs_metabolism.search_by_name("atp", "species", False).shape[0] == 1
312
- assert sbml_dfs_metabolism.search_by_name("pyr", "species").shape[0] == 3
313
- assert sbml_dfs_metabolism.search_by_name("kinase", "reactions").shape[0] == 4
315
+ assert (
316
+ sbml_dfs_metabolism.search_by_name("atp", SBML_DFS.SPECIES, False).shape[0] == 1
317
+ )
318
+ assert sbml_dfs_metabolism.search_by_name("pyr", SBML_DFS.SPECIES).shape[0] == 3
319
+ assert (
320
+ sbml_dfs_metabolism.search_by_name("kinase", SBML_DFS.REACTIONS).shape[0] == 4
321
+ )
314
322
 
315
323
 
316
324
  def test_search_by_id(sbml_dfs_metabolism):
317
- identifiers_tbl = sbml_dfs_metabolism.get_identifiers("species")
325
+ identifiers_tbl = sbml_dfs_metabolism.get_identifiers(SBML_DFS.SPECIES)
318
326
  ids, species = sbml_dfs_metabolism.search_by_ids(
319
- ["P40926"], "species", identifiers_tbl
327
+ identifiers_tbl, identifiers=["P40926"]
320
328
  )
321
329
  assert ids.shape[0] == 1
322
330
  assert species.shape[0] == 1
323
331
 
324
332
  ids, species = sbml_dfs_metabolism.search_by_ids(
325
- ["57540", "30744"], "species", identifiers_tbl, {"chebi"}
333
+ identifiers_tbl,
334
+ identifiers=["57540", "30744"],
335
+ ontologies={ONTOLOGIES.CHEBI},
326
336
  )
327
337
  assert ids.shape[0] == 2
328
338
  assert species.shape[0] == 2
329
339
 
330
- ids, species = sbml_dfs_metabolism.search_by_ids(
331
- ["baz"], "species", identifiers_tbl
332
- )
333
- assert ids.shape[0] == 0
334
- assert species.shape[0] == 0
340
+ with pytest.raises(
341
+ ValueError, match="None of the requested identifiers are present"
342
+ ):
343
+ ids, species = sbml_dfs_metabolism.search_by_ids(
344
+ identifiers_tbl, identifiers=["baz"] # Non-existent identifier
345
+ )
335
346
 
336
347
 
337
348
  def test_species_status(sbml_dfs):
338
349
 
339
350
  species = sbml_dfs.species
340
- select_species = species[species["s_name"] == "OxyHbA"]
351
+ select_species = species[species[SBML_DFS.S_NAME] == "OxyHbA"]
341
352
  assert select_species.shape[0] == 1
342
353
 
343
354
  status = sbml_dfs.species_status(select_species.index[0])
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import pandas as pd
4
+ import pytest
4
5
 
5
6
  from napistu import sbml_dfs_utils
6
7
  from napistu.constants import (
@@ -10,6 +11,10 @@ from napistu.constants import (
10
11
  SBML_DFS,
11
12
  IDENTIFIERS,
12
13
  SBOTERM_NAMES,
14
+ VALID_SBO_TERMS,
15
+ VALID_SBO_TERM_NAMES,
16
+ MINI_SBO_FROM_NAME,
17
+ MINI_SBO_TO_NAME,
13
18
  )
14
19
 
15
20
 
@@ -219,3 +224,113 @@ def test_stubbed_compartment():
219
224
  "url": "https://www.ebi.ac.uk/QuickGO/term/GO:0005575",
220
225
  "bqb": "BQB_IS",
221
226
  }
227
+
228
+
229
+ def test_validate_sbo_values_success():
230
+ # Should not raise
231
+ sbml_dfs_utils._validate_sbo_values(pd.Series(VALID_SBO_TERMS), validate="terms")
232
+ sbml_dfs_utils._validate_sbo_values(
233
+ pd.Series(VALID_SBO_TERM_NAMES), validate="names"
234
+ )
235
+
236
+
237
+ def test_validate_sbo_values_invalid_type():
238
+ with pytest.raises(ValueError, match="Invalid validation type"):
239
+ sbml_dfs_utils._validate_sbo_values(
240
+ pd.Series(VALID_SBO_TERMS), validate="badtype"
241
+ )
242
+
243
+
244
+ def test_validate_sbo_values_invalid_value():
245
+ # Add an invalid term
246
+ s = pd.Series(VALID_SBO_TERMS + ["SBO:9999999"])
247
+ with pytest.raises(ValueError, match="unusable SBO terms"):
248
+ sbml_dfs_utils._validate_sbo_values(s, validate="terms")
249
+ # Add an invalid name
250
+ s = pd.Series(VALID_SBO_TERM_NAMES + ["not_a_name"])
251
+ with pytest.raises(ValueError, match="unusable SBO terms"):
252
+ sbml_dfs_utils._validate_sbo_values(s, validate="names")
253
+
254
+
255
+ def test_sbo_constants_internal_consistency():
256
+ # Every term should have a name and vice versa
257
+ # MINI_SBO_FROM_NAME: name -> term, MINI_SBO_TO_NAME: term -> name
258
+ terms_from_names = set(MINI_SBO_FROM_NAME.values())
259
+ names_from_terms = set(MINI_SBO_TO_NAME.values())
260
+ assert terms_from_names == set(VALID_SBO_TERMS)
261
+ assert names_from_terms == set(VALID_SBO_TERM_NAMES)
262
+ # Bijective mapping
263
+ for name, term in MINI_SBO_FROM_NAME.items():
264
+ assert MINI_SBO_TO_NAME[term] == name
265
+ for term, name in MINI_SBO_TO_NAME.items():
266
+ assert MINI_SBO_FROM_NAME[name] == term
267
+
268
+
269
+ def test_infer_entity_type():
270
+ """Test entity type inference with valid keys"""
271
+ # when index matches primary key.
272
+ # Test compartments with index as primary key
273
+ df = pd.DataFrame(
274
+ {SBML_DFS.C_NAME: ["cytoplasm"], SBML_DFS.C_IDENTIFIERS: ["GO:0005737"]}
275
+ )
276
+ df.index.name = SBML_DFS.C_ID
277
+ result = sbml_dfs_utils.infer_entity_type(df)
278
+ assert result == SBML_DFS.COMPARTMENTS
279
+
280
+ # Test species with index as primary key
281
+ df = pd.DataFrame(
282
+ {SBML_DFS.S_NAME: ["glucose"], SBML_DFS.S_IDENTIFIERS: ["CHEBI:17234"]}
283
+ )
284
+ df.index.name = SBML_DFS.S_ID
285
+ result = sbml_dfs_utils.infer_entity_type(df)
286
+ assert result == SBML_DFS.SPECIES
287
+
288
+ # Test entity type inference by exact column matching.
289
+ # Test compartmentalized_species (has foreign keys)
290
+ df = pd.DataFrame(
291
+ {
292
+ SBML_DFS.SC_ID: ["glucose_c"],
293
+ SBML_DFS.S_ID: ["glucose"],
294
+ SBML_DFS.C_ID: ["cytoplasm"],
295
+ }
296
+ )
297
+ result = sbml_dfs_utils.infer_entity_type(df)
298
+ assert result == "compartmentalized_species"
299
+
300
+ # Test reaction_species (has foreign keys)
301
+ df = pd.DataFrame(
302
+ {
303
+ SBML_DFS.RSC_ID: ["rxn1_glc"],
304
+ SBML_DFS.R_ID: ["rxn1"],
305
+ SBML_DFS.SC_ID: ["glucose_c"],
306
+ }
307
+ )
308
+ result = sbml_dfs_utils.infer_entity_type(df)
309
+ assert result == SBML_DFS.REACTION_SPECIES
310
+
311
+ # Test reactions (only primary key)
312
+ df = pd.DataFrame({SBML_DFS.R_ID: ["rxn1"]})
313
+ result = sbml_dfs_utils.infer_entity_type(df)
314
+ assert result == SBML_DFS.REACTIONS
315
+
316
+
317
+ def test_infer_entity_type_errors():
318
+ """Test error cases for entity type inference."""
319
+ # Test no matching entity type
320
+ df = pd.DataFrame({"random_column": ["value"], "another_col": ["data"]})
321
+ with pytest.raises(ValueError, match="No entity type matches DataFrame"):
322
+ sbml_dfs_utils.infer_entity_type(df)
323
+
324
+ # Test partial match (missing required foreign key)
325
+ df = pd.DataFrame(
326
+ {SBML_DFS.SC_ID: ["glucose_c"], SBML_DFS.S_ID: ["glucose"]}
327
+ ) # Missing c_id
328
+ with pytest.raises(ValueError):
329
+ sbml_dfs_utils.infer_entity_type(df)
330
+
331
+ # Test extra primary keys that shouldn't be there
332
+ df = pd.DataFrame(
333
+ {SBML_DFS.R_ID: ["rxn1"], SBML_DFS.S_ID: ["glucose"]}
334
+ ) # Two primary keys
335
+ with pytest.raises(ValueError):
336
+ sbml_dfs_utils.infer_entity_type(df)
tests/test_utils.py CHANGED
@@ -2,9 +2,10 @@ from __future__ import annotations
2
2
 
3
3
  import gzip
4
4
  import os
5
+ import tempfile
5
6
  from datetime import datetime
6
- from unittest.mock import Mock
7
- from unittest.mock import patch
7
+ from pathlib import Path
8
+ from unittest.mock import Mock, patch
8
9
 
9
10
  import numpy as np
10
11
  import pandas as pd
@@ -705,3 +706,26 @@ def test_update_pathological_names():
705
706
  s3 = pd.Series(["foo", "bar", "baz"])
706
707
  out3 = utils.update_pathological_names(s3, "prefix_")
707
708
  assert list(out3) == ["foo", "bar", "baz"]
709
+
710
+
711
+ def test_parquet_save_load():
712
+ """Test that write_parquet and read_parquet work correctly."""
713
+ # Create test DataFrame
714
+ original_df = pd.DataFrame(
715
+ {
716
+ "sc_id_origin": ["A", "B", "C"],
717
+ "sc_id_dest": ["B", "C", "A"],
718
+ "path_length": [1, 2, 3],
719
+ "path_weights": [0.1, 0.5, 0.8],
720
+ "has_connection": [True, False, True],
721
+ }
722
+ )
723
+
724
+ # Write and read using temporary file
725
+ with tempfile.TemporaryDirectory() as temp_dir:
726
+ file_path = Path(temp_dir) / "test.parquet"
727
+ utils.save_parquet(original_df, file_path)
728
+ result_df = utils.load_parquet(file_path)
729
+
730
+ # Verify they're identical
731
+ pd.testing.assert_frame_equal(original_df, result_df)