cavapy 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cavapy might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: cavapy
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: CAVA Python package. Retrive and analyze climate data.
5
5
  License: MIT
6
6
  Author: Riccardo Soldan
@@ -127,6 +127,6 @@ Togo_climate_data = cavapy.get_climate_data(country="Togo", variables=["tasmax",
127
127
 
128
128
  ```
129
129
  import cavapy
130
- Togo_climate_data = cavapy.get_climate_data(country="Togo", cordex_domain="AFR-22",variables=["tasmax", "pr"], rcp="rcp26", gcm="MPI", rcm="REMO", years_up_to=2030, obs=True, bias_correction=True, historical=True, years_obs=range(1980,2019))
130
+ Togo_climate_data = cavapy.get_climate_data(country="Togo", variables=["tasmax", "pr"], obs=True, years_obs=range(1980,2019))
131
131
  ```
132
132
 
@@ -0,0 +1,5 @@
1
+ cavapy.py,sha256=5dOcnnY7tKG40SYY0AmD5Eexn---NJGog3d8ZH6DoXA,29630
2
+ cavapy-0.3.0.dist-info/LICENSE,sha256=1etyG4_n-Tb3yoNMwQ38g_WxXFQ4E_ZCjZc-AGYPc9U,1151
3
+ cavapy-0.3.0.dist-info/METADATA,sha256=Z8uDX1Y5dnc_vmKSVydZqukjyfK83PpYrLqAhJeXSAU,6953
4
+ cavapy-0.3.0.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
5
+ cavapy-0.3.0.dist-info/RECORD,,
cavapy.py CHANGED
@@ -420,6 +420,70 @@ def _validate_cordex_domain(xlim, ylim, cordex_domain):
420
420
  )
421
421
 
422
422
 
423
+ def _leave_one_out_bias_correction(ref, hist, variable, log):
424
+ """
425
+ Perform leave-one-out cross-validation for bias correction to avoid overfitting.
426
+
427
+ Args:
428
+ ref: Reference (observational) data
429
+ hist: Historical model data
430
+ variable: Variable name for determining correction method
431
+ log: Logger instance
432
+
433
+ Returns:
434
+ xr.DataArray: Bias-corrected historical data
435
+ """
436
+ log.info("Starting leave-one-out cross-validation for bias correction")
437
+
438
+ # Get unique years from historical data
439
+ hist_years = hist.time.dt.year.values
440
+ unique_years = np.unique(hist_years)
441
+
442
+ # Initialize list to store corrected data for each year
443
+ corrected_years = []
444
+
445
+ for leave_out_year in unique_years:
446
+ log.info(f"Processing leave-out year: {leave_out_year}")
447
+
448
+ # Create masks for training (all years except leave_out_year) and testing (only leave_out_year)
449
+ train_mask = hist.time.dt.year != leave_out_year
450
+ test_mask = hist.time.dt.year == leave_out_year
451
+
452
+ # Get training data (all years except the current one)
453
+ hist_train = hist.sel(time=train_mask)
454
+ hist_test = hist.sel(time=test_mask)
455
+
456
+ # Get corresponding reference data for training period
457
+ ref_train_mask = ref.time.dt.year != leave_out_year
458
+ ref_train = ref.sel(time=ref_train_mask)
459
+
460
+ # Train the bias correction model on the training data
461
+ QM_leave_out = sdba.EmpiricalQuantileMapping.train(
462
+ ref_train,
463
+ hist_train,
464
+ group="time.month",
465
+ kind="*" if variable in ["pr", "rsds", "sfcWind"] else "+",
466
+ )
467
+
468
+ # Apply bias correction to the left-out year
469
+ hist_corrected_year = QM_leave_out.adjust(
470
+ hist_test, extrapolation="constant", interp="linear"
471
+ )
472
+
473
+ # Apply variable-specific constraints
474
+ if variable == "hurs":
475
+ hist_corrected_year = hist_corrected_year.where(hist_corrected_year <= 100, 100)
476
+ hist_corrected_year = hist_corrected_year.where(hist_corrected_year >= 0, 0)
477
+
478
+ corrected_years.append(hist_corrected_year)
479
+
480
+ # Concatenate all corrected years and sort by time
481
+ hist_bs = xr.concat(corrected_years, dim="time").sortby("time")
482
+
483
+ log.info("Leave-one-out cross-validation bias correction completed")
484
+ return hist_bs
485
+
486
+
423
487
  def process_worker(num_threads, **kwargs) -> xr.DataArray:
424
488
  variable = kwargs["variable"]
425
489
  log = logger.getChild(variable)
@@ -514,20 +578,24 @@ def _climate_data_for_variable(
514
578
  if bias_correction and historical:
515
579
  # Load observations for bias correction
516
580
  ref = future_obs.result()
517
- log.info("Training eqm with historical data")
581
+ log.info("Training eqm with leave-one-out cross-validation")
582
+
583
+ # Use leave-one-out cross-validation for historical bias correction
584
+ hist_bs = _leave_one_out_bias_correction(ref, hist, variable, log)
585
+
586
+ # For projections, train on all historical data
518
587
  QM_mo = sdba.EmpiricalQuantileMapping.train(
519
588
  ref,
520
589
  hist,
521
590
  group="time.month",
522
591
  kind="*" if variable in ["pr", "rsds", "sfcWind"] else "+",
523
592
  )
524
- log.info("Performing bias correction with eqm")
525
- hist_bs = QM_mo.adjust(hist, extrapolation="constant", interp="linear")
593
+ log.info("Performing bias correction on projections with full historical training")
526
594
  proj_bs = QM_mo.adjust(proj, extrapolation="constant", interp="linear")
527
595
  log.info("Done!")
528
596
  if variable == "hurs":
529
- hist_bs = hist_bs.where(hist_bs <= 100, 100)
530
- hist_bs = hist_bs.where(hist_bs >= 0, 0)
597
+ proj_bs = proj_bs.where(proj_bs <= 100, 100)
598
+ proj_bs = proj_bs.where(proj_bs >= 0, 0)
531
599
  combined = xr.concat([hist_bs, proj_bs], dim="time")
532
600
  return combined
533
601
 
@@ -694,7 +762,7 @@ def _download_data(
694
762
 
695
763
 
696
764
  if __name__ == "__main__":
697
- # Example 1: Get observational data (simplified syntax)
765
+ # Example 1: Get observational data
698
766
  print("Getting observational data...")
699
767
  obs_data = get_climate_data(
700
768
  country="Togo",
@@ -704,7 +772,7 @@ if __name__ == "__main__":
704
772
  )
705
773
  print("Observational data keys:", list(obs_data.keys()))
706
774
 
707
- # Example 2: Get CORDEX projection data
775
+ # Example 2: Get CORDEX bc projection data and bc historical data
708
776
  print("\nGetting CORDEX projection data...")
709
777
  proj_data = get_climate_data(
710
778
  country="Togo",
@@ -713,6 +781,8 @@ if __name__ == "__main__":
713
781
  rcp="rcp26",
714
782
  gcm="MPI",
715
783
  rcm="Reg",
716
- years_up_to=2030,
784
+ years_up_to=2010,
785
+ historical=True,
786
+ bias_correction=True
717
787
  )
718
788
  print("Projection data keys:", list(proj_data.keys()))
@@ -1,5 +0,0 @@
1
- cavapy.py,sha256=LxsUXZoe_bIQ8rXXrf_GRhvC-RxTVTjLddgrIU8n4qY,26848
2
- cavapy-0.2.0.dist-info/LICENSE,sha256=1etyG4_n-Tb3yoNMwQ38g_WxXFQ4E_ZCjZc-AGYPc9U,1151
3
- cavapy-0.2.0.dist-info/METADATA,sha256=byD0GIxS5D_mT-BDxehvV6Vjf_U6d8tgiYcrBLppbcM,7068
4
- cavapy-0.2.0.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
5
- cavapy-0.2.0.dist-info/RECORD,,
File without changes