metameq 2026.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,657 @@
1
+ import numpy as np
2
+ import pandas
3
+ from pandas.testing import assert_frame_equal
4
+ from unittest import TestCase
5
+ from metameq.src.metadata_merger import _check_for_nans, \
6
+ _check_for_duplicate_field_vals, _validate_merge, \
7
+ merge_many_to_one_metadata, merge_one_to_one_metadata, \
8
+ merge_sample_and_subject_metadata, find_common_col_names, \
9
+ find_common_df_cols
10
+
11
+
12
+ class TestMetadataMerger(TestCase):
13
+ """Test suite for metadata merging functions in metameq.src.metadata_merger module."""
14
+
15
+ # Tests for _check_for_nans
16
+ def test__check_for_nans_wo_nans(self):
17
+ """Test checking for NaNs when no NaNs are present in the selected column.
18
+
19
+ Verifies that an empty list is returned when checking a column that
20
+ contains no NaN values, even if other columns contain NaNs.
21
+ """
22
+
23
+ df = pandas.DataFrame({
24
+ "a": [1, 2, 3],
25
+ "b": [4, np.nan, 6]
26
+ })
27
+
28
+ obs = _check_for_nans(df, "test", "a")
29
+ self.assertEqual([], obs)
30
+
31
+ def test__check_for_nans_w_nans(self):
32
+ """Test checking for NaNs when NaNs are present in the selected column.
33
+
34
+ Verifies that a list containing appropriate message string(s) is returned
35
+ when checking a column that contains NaN values.
36
+ """
37
+
38
+ df = pandas.DataFrame({
39
+ "a": [1, np.nan, 3],
40
+ "b": [4, np.nan, 6]
41
+ })
42
+
43
+ obs = _check_for_nans(df, "test", "b")
44
+ self.assertEqual(["'test' metadata has NaNs in column 'b'"], obs)
45
+
46
+ def test__check_for_nans_with_empty(self):
47
+ """Test that checking for NaNs in an empty DataFrame raises an error"""
48
+ empty_df = pandas.DataFrame()
49
+ with self.assertRaises(Exception):
50
+ _check_for_nans(empty_df, "test", "a")
51
+
52
+ # Tests for _check_for_duplicate_field_vals
53
+ def test__check_for_duplicate_field_vals(self):
54
+ """Test checking for duplicate values when no duplicates are present.
55
+
56
+ Verifies that an empty list is returned when checking a column that
57
+ contains no duplicate values, even if other columns contain duplicates.
58
+ """
59
+
60
+ df = pandas.DataFrame({
61
+ "a": [1, 2, 3],
62
+ "b": [4, 5, 5]
63
+ })
64
+
65
+ obs = _check_for_duplicate_field_vals(df, "test", "a")
66
+ self.assertEqual([], obs)
67
+
68
+ def test__check_for_duplicate_field_vals_w_duplicates(self):
69
+ """Test checking for duplicate values when duplicates are present.
70
+
71
+ Verifies that a list containing appropriate message string(s) is returned
72
+ when checking a column that contains duplicate values.
73
+ """
74
+
75
+ df = pandas.DataFrame({
76
+ "a": [1, 2, 2, 3, 1],
77
+ "b": [4, 5, 6, 6, 4]
78
+ })
79
+
80
+ obs = _check_for_duplicate_field_vals(df, "test", "a")
81
+ self.assertEqual(
82
+ ["'test' metadata has duplicates of the following values in "
83
+ "column 'a': [1 2]"], obs)
84
+
85
+ def test_check_for_duplicate_field_vals_with_empty(self):
86
+ """Test that checking for duplicate values in an empty DataFrame returns an empty list."""
87
+ df = pandas.DataFrame()
88
+ result = _check_for_duplicate_field_vals(df, "test", "col")
89
+ self.assertEqual(result, [])
90
+
91
+ # Tests for _validate_merge
92
+ def test__validate_merge(self):
93
+ """Test validating merge operation with valid input data.
94
+
95
+ Verifies that no exceptions are raised when validating a merge operation
96
+ with valid input DataFrames and merge columns.
97
+ """
98
+ left_df = pandas.DataFrame({
99
+ "id": ['x', 'y', 'z'],
100
+ "a": [1, 2, 3],
101
+ "b": [4, 5, 6]
102
+ })
103
+ right_df = pandas.DataFrame({
104
+ "name": ['x', 'y', 'z'],
105
+ "c": [7, 8, 9],
106
+ "d": [10, 11, 12]
107
+ })
108
+
109
+ _validate_merge(left_df, right_df, "a", "c")
110
+ self.assertTrue(True)
111
+
112
+ def test__validate_merge_err_left_col(self):
113
+ """Test validating merge operation with missing left merge column.
114
+
115
+ Verifies that a ValueError is raised with an appropriate message when
116
+ the left DataFrame is missing the specified merge column.
117
+ """
118
+ # test case 1: no errors
119
+ left_df = pandas.DataFrame({
120
+ "id": ['x', 'y', 'z'],
121
+ "a": [1, 2, 3],
122
+ "b": [4, 5, 6]
123
+ })
124
+ right_df = pandas.DataFrame({
125
+ "name": ['x', 'y', 'z'],
126
+ "c": [7, 8, 9],
127
+ "d": [10, 11, 12]
128
+ })
129
+
130
+ with self.assertRaisesRegex(
131
+ ValueError, r"left metadata missing merge column: \['c'\]"):
132
+ _validate_merge(left_df, right_df, "c", "c")
133
+
134
+ def test__validate_merge_err_right_col(self):
135
+ """Test validating merge operation with missing right merge column.
136
+
137
+ Verifies that a ValueError is raised with an appropriate message when
138
+ the right DataFrame is missing the specified merge column.
139
+ """
140
+ # test case 1: no errors
141
+ left_df = pandas.DataFrame({
142
+ "id": ['x', 'y', 'z'],
143
+ "a": [1, 2, 3],
144
+ "b": [4, 5, 6]
145
+ })
146
+ right_df = pandas.DataFrame({
147
+ "name": ['x', 'y', 'z'],
148
+ "c": [7, 8, 9],
149
+ "d": [10, 11, 12]
150
+ })
151
+
152
+ with self.assertRaisesRegex(
153
+ ValueError, r"right metadata missing merge column: \['a'\]"):
154
+ _validate_merge(left_df, right_df, "a", "a")
155
+
156
+ def test__validate_merge_err_msgs(self):
157
+ """Test validating merge operation with multiple validation errors.
158
+
159
+ Verifies that a ValueError is raised with a comprehensive error message
160
+ when multiple validation issues are present (e.g., NaNs and duplicates) in
161
+ both DataFrames.
162
+ """
163
+ # test case 1: no errors
164
+ left_df = pandas.DataFrame({
165
+ "id": ['x', np.nan, 'x'],
166
+ "a": [1, 2, 3],
167
+ "b": [4, 5, 6]
168
+ })
169
+ right_df = pandas.DataFrame({
170
+ "name": [np.nan, 'y', 'y'],
171
+ "c": [7, 8, 9],
172
+ "d": [10, 11, 12]
173
+ })
174
+
175
+ exp_msg = r"""Errors in metadata to merge:
176
+ 'left' metadata has NaNs in column 'id'
177
+ 'right' metadata has NaNs in column 'name'
178
+ 'left' metadata has duplicates of the following values in column 'id': \['x'\]
179
+ 'right' metadata has duplicates of the following values in column 'name': \['y'\]""" # noqa E501
180
+
181
+ with self.assertRaisesRegex(ValueError, exp_msg):
182
+ _validate_merge(left_df, right_df, "id", "name")
183
+
184
+ # I'm not going to test every variation of the merge_one_to_one_metadata
185
+ # join (left, right, inner, outer, etc.) because the pandas library is
186
+ # already well-tested. I'm just going to test one to show that the
187
+ # function's calling the pandas merge function with the correct parameters.
188
+
189
+ # Tests for merge_one_to_one_metadata
190
+ def test_merge_one_to_one_metadata_left(self):
191
+ """Test one-to-one metadata merge with left join type.
192
+
193
+ Verifies that the merge operation correctly performs a left join,
194
+ preserving all rows from the left DataFrame and matching rows from
195
+ the right DataFrame.
196
+ """
197
+ # test case 1: no errors
198
+ left_df = pandas.DataFrame({
199
+ "id": ['x', 'y', 'z'],
200
+ "a": [1, 2, 3],
201
+ "b": [4, 5, 6]
202
+ })
203
+ right_df = pandas.DataFrame({
204
+ "name": ['x', 'y', 'z', 'q'],
205
+ "c": [7, 8, 9, 90],
206
+ "d": [10, 11, 12, 120]
207
+ })
208
+
209
+ obs = merge_one_to_one_metadata(
210
+ left_df, right_df, "id", "name")
211
+ exp = pandas.DataFrame({
212
+ "id": ['x', 'y', 'z'],
213
+ "a": [1, 2, 3],
214
+ "b": [4, 5, 6],
215
+ "name": ['x', 'y', 'z'],
216
+ "c": [7, 8, 9],
217
+ "d": [10, 11, 12]
218
+ })
219
+
220
+ assert_frame_equal(obs, exp)
221
+
222
+ def test_merge_one_to_one_metadata_err(self):
223
+ """Test one-to-one metadata merge with validation errors.
224
+
225
+ Verifies that appropriate errors are raised when attempting to merge
226
+ DataFrames with validation issues (e.g., NaNs in merge columns).
227
+ """
228
+ # this doesn't test ALL the errors, just that errors can be thrown
229
+ left_df = pandas.DataFrame({
230
+ "id": ['x', 'y', 'z'],
231
+ "a": [1, 2, 3],
232
+ "b": [4, 5, 6]
233
+ })
234
+ right_df = pandas.DataFrame({
235
+ "name": ['x', np.nan, 'z'],
236
+ "c": [7, 8, 9],
237
+ "d": [10, 11, 12]
238
+ })
239
+
240
+ with self.assertRaisesRegex(
241
+ ValueError, r"Errors in metadata to merge:\n"
242
+ r"'second' metadata has NaNs in column 'name'"):
243
+ merge_one_to_one_metadata(
244
+ left_df, right_df, "id", "name",
245
+ set_name_left="first", set_name_right="second")
246
+
247
+ # Tests for merge_many_to_one_metadata
248
+ def test_merge_many_to_one_metadata(self):
249
+ """Test many-to-one metadata merge operation.
250
+
251
+ Verifies that the merge operation correctly handles cases where multiple
252
+ rows in the left DataFrame map to a single row in the right DataFrame.
253
+ """
254
+ # test case 1: no errors
255
+ left_df = pandas.DataFrame({
256
+ "id": [101, 102, 103, 104],
257
+ "name": ['x', 'y', 'z', 'x'],
258
+ "a": [1, 2, 3, 4],
259
+ "b": [5, 6, 7, 8]
260
+ })
261
+ right_df = pandas.DataFrame({
262
+ "name": ['x', 'y', 'z'],
263
+ "c": [9, 10, 11],
264
+ "d": [12, 13, 14]
265
+ })
266
+
267
+ obs = merge_many_to_one_metadata(
268
+ left_df, right_df, "name", "name")
269
+ exp = pandas.DataFrame({
270
+ "id": [101, 102, 103, 104],
271
+ "name": ['x', 'y', 'z', 'x'],
272
+ "a": [1, 2, 3, 4],
273
+ "b": [5, 6, 7, 8],
274
+ "c": [9, 10, 11, 9],
275
+ "d": [12, 13, 14, 12]
276
+ })
277
+
278
+ assert_frame_equal(obs, exp)
279
+
280
+ def test_merge_many_to_one_metadata_err(self):
281
+ """Test many-to-one metadata merge with validation errors.
282
+
283
+ Verifies that appropriate errors are raised when attempting to merge
284
+ DataFrames with validation issues (e.g., NaNs in merge columns).
285
+ """
286
+ # this doesn't test ALL the errors, just that errors can be thrown
287
+ left_df = pandas.DataFrame({
288
+ "id": [101, 102, 103, 104],
289
+ "name": ['x', 'y', 'z', 'x'],
290
+ "a": [1, 2, 3, 4],
291
+ "b": [5, 6, 7, 8]
292
+ })
293
+ right_df = pandas.DataFrame({
294
+ "name": ['x', 'y', np.nan],
295
+ "c": [9, 10, 11],
296
+ "d": [12, 13, 14]
297
+ })
298
+
299
+ with self.assertRaisesRegex(
300
+ ValueError, r"Errors in metadata to merge:\n'uno' metadata "
301
+ r"has NaNs in column 'name'"):
302
+ merge_many_to_one_metadata(
303
+ left_df, right_df, "name",
304
+ set_name_many="lots", set_name_one="uno")
305
+
306
+ # Tests for merge_sample_and_subject_metadata
307
+ def test_merge_sample_and_subject_metadata(self):
308
+ """Test merging sample and subject metadata.
309
+
310
+ Verifies that the merge operation correctly combines sample metadata
311
+ with corresponding subject metadata based on matching identifiers.
312
+ """
313
+ left_df = pandas.DataFrame({
314
+ "id": [101, 102, 103, 104],
315
+ "name": ['x', 'y', 'z', 'x'],
316
+ "a": [1, 2, 3, 4],
317
+ "b": [5, 6, 7, 8]
318
+ })
319
+ right_df = pandas.DataFrame({
320
+ "name": ['x', 'y', 'z'],
321
+ "c": [9, 10, 11],
322
+ "d": [12, 13, 14]
323
+ })
324
+
325
+ obs = merge_sample_and_subject_metadata(
326
+ left_df, right_df, "name")
327
+ exp = pandas.DataFrame({
328
+ "id": [101, 102, 103, 104],
329
+ "name": ['x', 'y', 'z', 'x'],
330
+ "a": [1, 2, 3, 4],
331
+ "b": [5, 6, 7, 8],
332
+ "c": [9, 10, 11, 9],
333
+ "d": [12, 13, 14, 12]
334
+ })
335
+
336
+ assert_frame_equal(obs, exp)
337
+
338
+ def test_merge_sample_and_subject_metadata_err(self):
339
+ """Test merging sample and subject metadata with validation errors.
340
+
341
+ Verifies that appropriate errors are raised when attempting to merge
342
+ sample and subject metadata with validation issues (e.g., NaNs in
343
+ merge columns).
344
+ """
345
+ left_df = pandas.DataFrame({
346
+ "id": [101, 102, 103, 104],
347
+ "name": ['x', 'y', 'z', 'x'],
348
+ "a": [1, 2, 3, 4],
349
+ "b": [5, 6, 7, 8]
350
+ })
351
+ right_df = pandas.DataFrame({
352
+ "name": ['x', 'y', np.nan],
353
+ "c": [9, 10, 11],
354
+ "d": [12, 13, 14]
355
+ })
356
+
357
+ with self.assertRaisesRegex(
358
+ ValueError, r"Errors in metadata to merge:\n'subject' metadata"
359
+ r" has NaNs in column 'name'"):
360
+ merge_sample_and_subject_metadata(
361
+ left_df, right_df, "name",)
362
+
363
+ # Tests for find_common_col_names
364
+ def test_find_common_col_names(self):
365
+ """Test finding common column names between two lists"""
366
+ list1 = ['col1', 'col2', 'col3']
367
+ list2 = ['col2', 'col3', 'col4']
368
+ result = find_common_col_names(list1, list2)
369
+ self.assertEqual(result, ['col2', 'col3'])
370
+
371
+ def test_find_common_col_names_empty(self):
372
+ """Test finding common column names with empty lists"""
373
+ result = find_common_col_names([], [])
374
+ self.assertEqual(result, [])
375
+
376
+ def test_find_common_col_names_no_common(self):
377
+ """Test finding common column names with no common columns"""
378
+ list1 = ['col1', 'col2']
379
+ list2 = ['col3', 'col4']
380
+ result = find_common_col_names(list1, list2)
381
+ self.assertEqual(result, [])
382
+
383
+ def test_find_common_col_names_case_sensitive(self):
384
+ """Test finding common column names with case sensitivity"""
385
+ list1 = ['Col1', 'col2']
386
+ list2 = ['col1', 'Col2']
387
+ result = find_common_col_names(list1, list2)
388
+ self.assertEqual(result, [])
389
+
390
+ # Tests for find_common_df_cols
391
+ def test_find_common_df_cols(self):
392
+ """Test finding common columns between two DataFrames"""
393
+ df1 = pandas.DataFrame({
394
+ 'col1': [1, 2],
395
+ 'col2': [3, 4],
396
+ 'col3': [5, 6]
397
+ })
398
+ df2 = pandas.DataFrame({
399
+ 'col2': [7, 8],
400
+ 'col3': [9, 10],
401
+ 'col4': [11, 12]
402
+ })
403
+
404
+ result = find_common_df_cols(df1, df2)
405
+ self.assertEqual(result, ['col2', 'col3'])
406
+
407
+ def test_find_common_df_cols_empty(self):
408
+ """Test finding common columns with empty DataFrames"""
409
+ df1 = pandas.DataFrame()
410
+ df2 = pandas.DataFrame()
411
+ result = find_common_df_cols(df1, df2)
412
+ self.assertEqual(result, [])
413
+
414
+ def test_find_common_df_cols_no_common(self):
415
+ """Test finding common columns with no common columns"""
416
+ df1 = pandas.DataFrame({
417
+ 'col1': [1, 2],
418
+ 'col2': [3, 4]
419
+ })
420
+
421
+ df2 = pandas.DataFrame({
422
+ 'col3': [5, 6],
423
+ 'col4': [7, 8]
424
+ })
425
+ result = find_common_df_cols(df1, df2)
426
+ self.assertEqual(result, [])
427
+
428
+ # Tests for merge_one_to_one_metadata
429
+ def test_merge_one_to_one_metadata_right(self):
430
+ """Test one-to-one metadata merge with right join type.
431
+
432
+ Verifies that the merge operation correctly performs a right join,
433
+ preserving all rows from the right DataFrame and matching rows from
434
+ the left DataFrame.
435
+ """
436
+ left_df = pandas.DataFrame({
437
+ "id": ['x', 'y', 'z'],
438
+ "a": [1, 2, 3],
439
+ "b": [4, 5, 6]
440
+ })
441
+ right_df = pandas.DataFrame({
442
+ "name": ['x', 'y', 'z', 'q'],
443
+ "c": [7, 8, 9, 90],
444
+ "d": [10, 11, 12, 120]
445
+ })
446
+
447
+ obs = merge_one_to_one_metadata(
448
+ left_df, right_df, "id", "name", join_type='right')
449
+ exp = pandas.DataFrame({
450
+ "id": ['x', 'y', 'z', np.nan],
451
+ "a": [1, 2, 3, np.nan],
452
+ "b": [4, 5, 6, np.nan],
453
+ "name": ['x', 'y', 'z', 'q'],
454
+ "c": [7, 8, 9, 90],
455
+ "d": [10, 11, 12, 120]
456
+ })
457
+
458
+ assert_frame_equal(obs, exp)
459
+
460
+ def test_merge_one_to_one_metadata_inner(self):
461
+ """Test one-to-one metadata merge with inner join type.
462
+
463
+ Verifies that the merge operation correctly performs an inner join,
464
+ only keeping rows where there are matches in both DataFrames.
465
+ """
466
+ left_df = pandas.DataFrame({
467
+ "id": ['x', 'y', 'z'],
468
+ "a": [1, 2, 3],
469
+ "b": [4, 5, 6]
470
+ })
471
+ right_df = pandas.DataFrame({
472
+ "name": ['x', 'y', 'q'],
473
+ "c": [7, 8, 90],
474
+ "d": [10, 11, 120]
475
+ })
476
+
477
+ obs = merge_one_to_one_metadata(
478
+ left_df, right_df, "id", "name", join_type='inner')
479
+ exp = pandas.DataFrame({
480
+ "id": ['x', 'y'],
481
+ "a": [1, 2],
482
+ "b": [4, 5],
483
+ "name": ['x', 'y'],
484
+ "c": [7, 8],
485
+ "d": [10, 11]
486
+ })
487
+
488
+ assert_frame_equal(obs, exp)
489
+
490
+ def test_merge_one_to_one_metadata_with_nans(self):
491
+ """Test one-to-one metadata merge with NaN values in merge columns.
492
+
493
+ Verifies that appropriate errors are raised when attempting to merge
494
+ DataFrames containing NaN values in the merge columns.
495
+ """
496
+ left_df = pandas.DataFrame({
497
+ "id": ['x', 'y', 'z'],
498
+ "a": [1, 2, 3],
499
+ "b": [4, 5, 6]
500
+ })
501
+ right_df = pandas.DataFrame({
502
+ "id": ['x', 'y', np.nan],
503
+ "c": [7, 8, 90],
504
+ "d": [10, 11, 120]
505
+ })
506
+
507
+ with self.assertRaises(ValueError):
508
+ merge_one_to_one_metadata(left_df, right_df, 'id')
509
+
510
+ def test_merge_one_to_one_metadata_with_duplicates(self):
511
+ """Test one-to-one metadata merge with duplicate values in merge columns.
512
+
513
+ Verifies that appropriate errors are raised when attempting to merge
514
+ DataFrames containing duplicate values in the merge columns.
515
+ """
516
+ left_df = pandas.DataFrame({
517
+ "id": ['x', 'y', 'z'],
518
+ "a": [1, 2, 3],
519
+ "b": [4, 5, 6]
520
+ })
521
+ right_df = pandas.DataFrame({
522
+ "name": ['x', 'y', 'z', 'y'],
523
+ "c": [7, 8, 9, 90],
524
+ "d": [10, 11, 12, 120]
525
+ })
526
+
527
+ with self.assertRaises(ValueError):
528
+ merge_one_to_one_metadata(left_df, right_df, 'id', 'name')
529
+
530
+ # Tests for merge_many_to_one_metadata
531
+ def test_merge_many_to_one_metadata_with_duplicates(self):
532
+ """Test many-to-one metadata merge with duplicate values in "many" merge column.
533
+
534
+ Verifies that the merge operation correctly handles cases where multiple
535
+ rows in the left DataFrame map to a single row in the right DataFrame,
536
+ including when there are duplicate values in the "many" merge column.
537
+ """
538
+ left_df = pandas.DataFrame({
539
+ "id": [101, 102, 103, 104],
540
+ "name": ['x', 'y', 'z', 'x'],
541
+ "a": [1, 2, 3, 4],
542
+ "b": [5, 6, 7, 8]
543
+ })
544
+ right_df = pandas.DataFrame({
545
+ "name": ['x', 'y', 'z'],
546
+ "c": [9, 10, 11],
547
+ "d": [12, 13, 14]
548
+ })
549
+
550
+ obs = merge_many_to_one_metadata(
551
+ left_df, right_df, "name", "name")
552
+ exp = pandas.DataFrame({
553
+ "id": [101, 102, 103, 104],
554
+ "name": ['x', 'y', 'z', 'x'],
555
+ "a": [1, 2, 3, 4],
556
+ "b": [5, 6, 7, 8],
557
+ "c": [9, 10, 11, 9],
558
+ "d": [12, 13, 14, 12]
559
+ })
560
+
561
+ assert_frame_equal(obs, exp)
562
+
563
+ # Tests for merge_sample_and_subject_metadata
564
+ def test_merge_sample_and_subject_metadata_with_missing(self):
565
+ """Test merging sample and subject metadata with missing matches.
566
+
567
+ Verifies that the merge operation correctly handles cases where some
568
+ samples do not have matching subject metadata, resulting in NaN values
569
+ for those samples.
570
+ """
571
+ sample_df = pandas.DataFrame({
572
+ "id": [101, 102, 103, 104],
573
+ "name": ['x', 'y', 'z', 'w'],
574
+ "a": [1, 2, 3, 4],
575
+ "b": [5, 6, 7, 8]
576
+ })
577
+ subject_df = pandas.DataFrame({
578
+ "name": ['x', 'y', 'z'],
579
+ "c": [9, 10, 11],
580
+ "d": [12, 13, 14]
581
+ })
582
+
583
+ obs = merge_sample_and_subject_metadata(
584
+ sample_df, subject_df, "name")
585
+ exp = pandas.DataFrame({
586
+ "id": [101, 102, 103, 104],
587
+ "name": ['x', 'y', 'z', 'w'],
588
+ "a": [1, 2, 3, 4],
589
+ "b": [5, 6, 7, 8],
590
+ "c": [9, 10, 11, np.nan],
591
+ "d": [12, 13, 14, np.nan]
592
+ })
593
+
594
+ assert_frame_equal(obs, exp)
595
+
596
+ def test_validate_merge(self):
597
+ """Test validating merge operation with valid input data.
598
+
599
+ Verifies that no exceptions are raised when validating a merge operation
600
+ with valid input DataFrames and merge columns, even when there are NaN
601
+ values in non-merge columns.
602
+ """
603
+ left_df = pandas.DataFrame({
604
+ "id": ['x', 'y', 'z'],
605
+ "a": [1, 2, 3],
606
+ "b": [4, 5, 6]
607
+ })
608
+ right_df = pandas.DataFrame({
609
+ "name": ['x', 'y', 'z'],
610
+ "c": [7, 8, 9],
611
+ # NaN in non-merge column shouldn't matter
612
+ "d": [10, np.nan, 12]
613
+ })
614
+
615
+ _validate_merge(left_df, right_df, 'id', 'name')
616
+ # Should not raise any exception
617
+
618
+ def test_validate_merge_non_existent_col(self):
619
+ """Test validating merge operation with non-existent merge column.
620
+
621
+ Verifies that a ValueError is raised when attempting to validate a merge
622
+ operation using a column that does not exist in one of the DataFrames.
623
+ """
624
+ left_df = pandas.DataFrame({
625
+ "id": ['x', 'y', 'z'],
626
+ "a": [1, 2, 3],
627
+ "b": [4, 5, 6]
628
+ })
629
+ right_df = pandas.DataFrame({
630
+ "name": ['x', 'y', 'z'],
631
+ "c": [7, 8, 9],
632
+ # NaN in non-merge column shouldn't matter
633
+ "d": [10, np.nan, 12]
634
+ })
635
+
636
+ with self.assertRaises(ValueError):
637
+ _validate_merge(left_df, right_df, 'subject_id', 'name')
638
+
639
+ def test_validate_merge_with_nans(self):
640
+ """Test validating merge operation with NaN values in merge columns.
641
+
642
+ Verifies that appropriate errors are raised when validating a merge
643
+ operation with NaN values in the merge columns of either DataFrame.
644
+ """
645
+ left_df = pandas.DataFrame({
646
+ "id": ['x', np.nan, 'z'],
647
+ "a": [1, 2, 3],
648
+ "b": [4, 5, 6]
649
+ })
650
+ right_df = pandas.DataFrame({
651
+ "name": ['x', 'y', np.nan],
652
+ "c": [7, 8, 9],
653
+ "d": [10, 11, 12]
654
+ })
655
+
656
+ with self.assertRaisesRegex(ValueError, "Errors in metadata to merge"):
657
+ _validate_merge(left_df, right_df, "id", "name")