PyPI - b3alien - Versions diffs - 0.2.2__tar.gz → 0.4.0__tar.gz - Mend

b3alien 0.2.2tar.gz → 0.4.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

{b3alien-0.2.2 → b3alien-0.4.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: b3alien
-Version: 0.2.2
+Version: 0.4.0
 Summary: Calculating the CBD target 6.1 indicator from occurrence cubes
 Author-email: Maarten Trekels <maarten.trekels@plantentuinmeise.be>
 License: MIT
@@ -229,14 +229,14 @@ _, vec1 = simulation.simulate_solow_costello_scipy(time, rate, vis=True)
-The vector 'vec1' contains the parameters of the fitting of the Solow-Costello model. The most important parameter in this case is the rate of establishment (2nd parameter).
+The vector 'vec1' contains the parameters of the fitting of the Solow-Costello model. The most important parameter in this case is the change in rate of establishment (2nd parameter).
 ```python
-print("Fitted Rate of Establishment from the data cube: " + str(vec1[1]) + "/year")
+print("Fitted change in Rate of Establishment from the data cube: " + str(vec1[1]) + "/year")
 ```
-    Fitted Rate of Establishment from the data cube: -0.025016351861057464/year
+    Fitted change in Rate of Establishment from the data cube: -0.025016351861057464/year
 ### Step 5: Determine the error margings on the fitted rate of establishment

{b3alien-0.2.2 → b3alien-0.4.0}/README.md RENAMED Viewed

@@ -193,14 +193,14 @@ _, vec1 = simulation.simulate_solow_costello_scipy(time, rate, vis=True)
-The vector 'vec1' contains the parameters of the fitting of the Solow-Costello model. The most important parameter in this case is the rate of establishment (2nd parameter).
+The vector 'vec1' contains the parameters of the fitting of the Solow-Costello model. The most important parameter in this case is the change in rate of establishment (2nd parameter).
 ```python
-print("Fitted Rate of Establishment from the data cube: " + str(vec1[1]) + "/year")
+print("Fitted change in Rate of Establishment from the data cube: " + str(vec1[1]) + "/year")
 ```
-    Fitted Rate of Establishment from the data cube: -0.025016351861057464/year
+    Fitted change in Rate of Establishment from the data cube: -0.025016351861057464/year
 ### Step 5: Determine the error margings on the fitted rate of establishment

{b3alien-0.2.2 → b3alien-0.4.0}/b3alien/simulation/__init__.py RENAMED Viewed

@@ -10,10 +10,14 @@ from .simulation import simulate_solow_costello
 from .simulation import simulate_solow_costello_scipy
 from .simulation import parallel_bootstrap_solow_costello
 from .simulation import plot_with_confidence
+from .simulation import get_bootstrap_errors
+from .simulation import run_bootstrap_analysis
 __all__ = [
     "simulate_solow_costello",
     "simulate_solow_costello_scipy",
     "parallel_bootstrap_solow_costello",
-    "plot_with_confidence"
+    "plot_with_confidence",
+    "get_bootstrap_errors",
+    "run_bootstrap_analysis",
 ]

{b3alien-0.2.2 → b3alien-0.4.0}/b3alien/simulation/simulation.py RENAMED Viewed

@@ -206,13 +206,72 @@ def simulate_solow_costello_scipy(annual_time_gbif, annual_rate_gbif, vis=False)
     return C1, vec1
+import numpy as np
+import pandas as pd
+def get_bootstrap_errors(annual_time, annual_rate, iterations=100):
+    """
+    Perform bootstrap resampling to estimate the standard errors of the parameters and C1 values.
+    Parameters
+    ----------
+    annual_time: pandas Series
+        Time points
+    annual_rate: pandas Series
+        Rates corresponding to the time points
+    iterations: int
+        Number of bootstrap iterations to perform
+    Returns
+    -------
+    vec1_mean
+        mean of the fitted parameters across bootstrap samples
+    vec1_std
+        standard error of the fitted parameters across bootstrap samples
+    C1_mean:
+        mean of the C1 values across bootstrap samples
+    C1_std:
+        standard error of the C1 values across bootstrap samples
+    """
+    all_vec1 = []
+    all_C1 = []
+    print(f"Starting {iterations} bootstrap iterations...")
+    for i in range(iterations):
+        # 1. Resample the data with replacement
+        # This simulates alternative 'histories' of the same process
+        indices = np.random.choice(len(annual_rate), size=len(annual_rate), replace=True)
+        resampled_time = annual_time.iloc[indices].sort_values()
+        resampled_rate = annual_rate.iloc[indices] # Keep rates associated with their times
+        try:
+            # 2. Run your existing fitting function
+            C1_boot, vec1_boot = simulate_solow_costello_scipy(resampled_time, resampled_rate, vis=False)
+            all_vec1.append(vec1_boot)
+            all_C1.append(C1_boot)
+        except Exception as e:
+            # Skip iterations that fail to converge
+            continue
+    # Convert to numpy arrays for easier math
+    all_vec1 = np.array(all_vec1)
+    all_C1 = np.array(all_C1)
+    # 3. Calculate means and standard errors
+    vec1_mean = np.mean(all_vec1, axis=0)
+    vec1_std = np.std(all_vec1, axis=0)
+    C1_mean = np.mean(all_C1, axis=0)
+    C1_std = np.std(all_C1, axis=0)
+    return vec1_mean, vec1_std, C1_mean, C1_std
 def bootstrap_worker(i, time_list, rate_list):
-    '''
-    Bootstrap on the residuals
-    Returns=
-    - fitting parameters (vec1)
-    - C1_sim cumulative prediction from refit
-    '''
+    """
+    Worker function for bootstrap analysis. Each worker will perform one bootstrap iteration.
+    Returns the fitted parameter and cumulative curve for that iteration.
+    """
     time_series = pd.Series(time_list)
     rate_series = pd.Series(rate_list)
@@ -237,6 +296,83 @@ def bootstrap_worker(i, time_list, rate_list):
     except Exception:
         return None
+def run_bootstrap_analysis(time_list, rate_list, n_iterations=200):
+    """
+    Run the bootstrap analysis in parallel and aggregate results into a DataFrame.
+    Parameters
+    ----------
+    time_list: list or pandas Series
+        Time points for the analysis.
+    rate_list: list or pandas Series
+        Rates corresponding to the time points.
+    n_iterations: int
+        Number of bootstrap iterations to perform.
+    Returns
+    -------
+    pandas DataFrame
+        A DataFrame containing the mean annual rates, cumulative values, and confidence intervals.
+    """
+    param_samples = []
+    cumulative_samples = []
+    print(f"Starting {n_iterations} bootstrap iterations...")
+    # 1. Parallel Execution
+    with ProcessPoolExecutor() as executor:
+        # Passing time and rate lists to every worker
+        futures = [executor.submit(bootstrap_worker, i, time_list, rate_list) for i in range(n_iterations)]
+        for f in futures:
+            res = f.result()
+            if res is not None:
+                p_val, cum_vals = res
+                param_samples.append(p_val)
+                cumulative_samples.append(cum_vals)
+    if not param_samples:
+        raise RuntimeError("All bootstrap iterations failed. No valid samples to analyze.")
+    # Convert to numpy arrays
+    param_samples = np.array(param_samples)
+    cumulative_samples = np.array(cumulative_samples)
+    # 2. Extract Annual Rates (Deltas) from Cumulative Samples
+    # Since worker returns np.cumsum(C1_sim), we take the difference to get C1 back
+    rate_samples = np.diff(cumulative_samples, axis=1, prepend=0)
+    # 3. Calculate Statistics for Rates (Deltas)
+    rate_mean = np.mean(rate_samples, axis=0)
+    rate_std = np.std(rate_samples, axis=0)
+    # 4. Calculate Statistics for Cumulative
+    cum_mean = np.mean(cumulative_samples, axis=0)
+    cum_std = np.std(cumulative_samples, axis=0)
+    # 5. Print the Fit Parameter (vec1[1]) with error
+    p_mean = np.mean(param_samples)
+    p_std = np.std(param_samples)
+    print("\n" + "="*30)
+    print("FITTING PARAMETERS RESULTS")
+    print("="*30)
+    print(f"Parameter vec1[1]: {p_mean:.6f} ± {p_std:.6f}")
+    print("="*30 + "\n")
+    # 6. Build the Resulting DataFrame
+    df_results = pd.DataFrame({
+        'Year': time_list,
+        'Annual_Rate': rate_mean,
+        'Annual_Rate_Error': rate_std,
+        'Cumulative_Value': cum_mean,
+        'Cumulative_Error': cum_std,
+        'Lower_CI_95': rate_mean - (1.96 * rate_std),
+        'Upper_CI_95': rate_mean + (1.96 * rate_std)
+    })
+    return df_results
 def parallel_bootstrap_solow_costello(annual_time_gbif, annual_rate_gbif, n_iterations=1000, ci=95):
     """
         Perform parallel bootstrapping of the Solow-Costello model
@@ -303,6 +439,7 @@ def parallel_bootstrap_solow_costello(annual_time_gbif, annual_rate_gbif, n_iter
         "c1_all": c1_curves
     }
 def plot_with_confidence(T, observed, results):
     """
         Plot the observed cumulative discoveries

{b3alien-0.2.2 → b3alien-0.4.0}/b3alien/utils/geo.py RENAMED Viewed

@@ -27,7 +27,7 @@ def to_geoparquet(csvFile, geoFile, leftID='eqdcellcode', rightID='cellCode', ex
     data = pd.read_csv(csvFile, sep='\t')
     geoRef = gpd.read_file(geoFile, engine='pyogrio', use_arrow=True, crs="EPSG:4326")
-    test_merge = pd.merge(data, qdgc_ref, left_on=leftID, right_on=rightID)
+    test_merge = pd.merge(data, geoRef, left_on=leftID, right_on=rightID)
     gdf = gpd.GeoDataFrame(test_merge, geometry='geometry')
     if gdf.crs is None:

{b3alien-0.2.2 → b3alien-0.4.0}/b3alien.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: b3alien
-Version: 0.2.2
+Version: 0.4.0
 Summary: Calculating the CBD target 6.1 indicator from occurrence cubes
 Author-email: Maarten Trekels <maarten.trekels@plantentuinmeise.be>
 License: MIT
@@ -229,14 +229,14 @@ _, vec1 = simulation.simulate_solow_costello_scipy(time, rate, vis=True)
-The vector 'vec1' contains the parameters of the fitting of the Solow-Costello model. The most important parameter in this case is the rate of establishment (2nd parameter).
+The vector 'vec1' contains the parameters of the fitting of the Solow-Costello model. The most important parameter in this case is the change in rate of establishment (2nd parameter).
 ```python
-print("Fitted Rate of Establishment from the data cube: " + str(vec1[1]) + "/year")
+print("Fitted change in Rate of Establishment from the data cube: " + str(vec1[1]) + "/year")
 ```
-    Fitted Rate of Establishment from the data cube: -0.025016351861057464/year
+    Fitted change in Rate of Establishment from the data cube: -0.025016351861057464/year
 ### Step 5: Determine the error margings on the fitted rate of establishment

{b3alien-0.2.2 → b3alien-0.4.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "b3alien"
-version = "0.2.2"
+version = "0.4.0"
 description = "Calculating the CBD target 6.1 indicator from occurrence cubes"
 readme = "README.md"
 requires-python = ">=3.10"

{b3alien-0.2.2 → b3alien-0.4.0}/tests/test_solow_costello.py RENAMED Viewed

@@ -149,4 +149,76 @@ def test_SC_original():
     C1, vec = simulate_solow_costello_scipy(T, numdis)
-    assert vec[1] > 0.0134 and vec[1] < 0.0136
+    assert vec[1] > 0.0134 and vec[1] < 0.0136
+# This fake worker must be a top-level function to be pickleable by multiprocessing.
+def _fake_bootstrap_worker_for_test(i, time_list, rate_list):
+    """
+    A predictable, fake bootstrap worker that returns known values for testing.
+    """
+    n_timesteps = len(time_list)
+    # Return a fake parameter and a fake cumulative curve.
+    # The parameter is just the iteration number `i`.
+    # The curve is a simple linear array based on `i`.
+    fake_param = float(i)
+    fake_cumulative_curve = np.arange(n_timesteps) * (i + 1)
+    return fake_param, fake_cumulative_curve
+def test_run_bootstrap_analysis(monkeypatch, tiny_series):
+    """
+    Tests the aggregation and DataFrame creation logic of run_bootstrap_analysis
+    by mocking the parallel worker.
+    """
+    T, y = tiny_series
+    n_iterations = 10
+    n_timesteps = len(T)
+    # Replace the real worker with our fake one
+    monkeypatch.setattr('b3alien.simulation.simulation.bootstrap_worker', _fake_bootstrap_worker_for_test)
+    # --- Run the function under test ---
+    results_df = run_bootstrap_analysis(list(T), list(y), n_iterations=n_iterations)
+    # --- Assertions ---
+    assert isinstance(results_df, pd.DataFrame)
+    assert len(results_df) == n_timesteps
+    assert 'Annual_Rate' in results_df.columns
+    assert 'Cumulative_Value' in results_df.columns
+    assert 'Lower_CI_95' in results_df.columns
+    assert not results_df.isnull().values.any()
+def test_get_bootstrap_errors(monkeypatch, tiny_series):
+    """
+    Tests the statistics calculation in get_bootstrap_errors by mocking the
+    underlying simulation function.
+    """
+    T, y = tiny_series
+    n_iterations = 5
+    n_timesteps = len(T)
+    # --- Mock the simulation function ---
+    # This list will hold the predictable results our fake function will return.
+    fake_results = []
+    for i in range(n_iterations):
+        # Create a unique but predictable result for each iteration
+        fake_vec = np.array([0.1, 0.2, i, i, i, i])  # vec1_boot
+        fake_c1 = np.full(n_timesteps, i + 1.0)      # C1_boot
+        fake_results.append((fake_c1, fake_vec))
+    # This fake function will pop one of the pre-made results on each call.
+    def fake_simulate_scipy(annual_time, annual_rate, vis=False):
+        return fake_results.pop(0)
+    monkeypatch.setattr('b3alien.simulation.simulation.simulate_solow_costello_scipy', fake_simulate_scipy)
+    # --- Run the function under test ---
+    vec1_mean, vec1_std, C1_mean, C1_std = get_bootstrap_errors(T, y, iterations=n_iterations)
+    # --- Assertions ---
+    # Expected C1 values are [1.0, 2.0, 3.0, 4.0, 5.0] for each timestep
+    assert C1_mean.shape == (n_timesteps,)
+    np.testing.assert_allclose(C1_mean, np.full(n_timesteps, 3.0)) # Mean of 1,2,3,4,5 is 3
+    np.testing.assert_allclose(C1_std, np.full(n_timesteps, np.std([1,2,3,4,5])))