b3alien 0.2.2__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {b3alien-0.2.2 → b3alien-0.4.0}/PKG-INFO +4 -4
  2. {b3alien-0.2.2 → b3alien-0.4.0}/README.md +3 -3
  3. {b3alien-0.2.2 → b3alien-0.4.0}/b3alien/simulation/__init__.py +5 -1
  4. {b3alien-0.2.2 → b3alien-0.4.0}/b3alien/simulation/simulation.py +143 -6
  5. {b3alien-0.2.2 → b3alien-0.4.0}/b3alien/utils/geo.py +1 -1
  6. {b3alien-0.2.2 → b3alien-0.4.0}/b3alien.egg-info/PKG-INFO +4 -4
  7. {b3alien-0.2.2 → b3alien-0.4.0}/pyproject.toml +1 -1
  8. {b3alien-0.2.2 → b3alien-0.4.0}/tests/test_solow_costello.py +73 -1
  9. {b3alien-0.2.2 → b3alien-0.4.0}/LICENSE +0 -0
  10. {b3alien-0.2.2 → b3alien-0.4.0}/b3alien/__init__.py +0 -0
  11. {b3alien-0.2.2 → b3alien-0.4.0}/b3alien/b3cube/__init__.py +0 -0
  12. {b3alien-0.2.2 → b3alien-0.4.0}/b3alien/b3cube/b3cube.py +0 -0
  13. {b3alien-0.2.2 → b3alien-0.4.0}/b3alien/griis/__init__.py +0 -0
  14. {b3alien-0.2.2 → b3alien-0.4.0}/b3alien/griis/griis.py +0 -0
  15. {b3alien-0.2.2 → b3alien-0.4.0}/b3alien/utils/__init__.py +0 -0
  16. {b3alien-0.2.2 → b3alien-0.4.0}/b3alien/utils/runtime.py +0 -0
  17. {b3alien-0.2.2 → b3alien-0.4.0}/b3alien/visualisation/__init__.py +0 -0
  18. {b3alien-0.2.2 → b3alien-0.4.0}/b3alien/visualisation/b3gee.py +0 -0
  19. {b3alien-0.2.2 → b3alien-0.4.0}/b3alien/visualisation/visualisation.py +0 -0
  20. {b3alien-0.2.2 → b3alien-0.4.0}/b3alien.egg-info/SOURCES.txt +0 -0
  21. {b3alien-0.2.2 → b3alien-0.4.0}/b3alien.egg-info/dependency_links.txt +0 -0
  22. {b3alien-0.2.2 → b3alien-0.4.0}/b3alien.egg-info/requires.txt +0 -0
  23. {b3alien-0.2.2 → b3alien-0.4.0}/b3alien.egg-info/top_level.txt +0 -0
  24. {b3alien-0.2.2 → b3alien-0.4.0}/setup.cfg +0 -0
  25. {b3alien-0.2.2 → b3alien-0.4.0}/tests/test_cube.py +0 -0
  26. {b3alien-0.2.2 → b3alien-0.4.0}/tests/test_griis.py +0 -0
  27. {b3alien-0.2.2 → b3alien-0.4.0}/tests/test_occurrence_cube.py +0 -0
  28. {b3alien-0.2.2 → b3alien-0.4.0}/tests/test_simulation.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: b3alien
3
- Version: 0.2.2
3
+ Version: 0.4.0
4
4
  Summary: Calculating the CBD target 6.1 indicator from occurrence cubes
5
5
  Author-email: Maarten Trekels <maarten.trekels@plantentuinmeise.be>
6
6
  License: MIT
@@ -229,14 +229,14 @@ _, vec1 = simulation.simulate_solow_costello_scipy(time, rate, vis=True)
229
229
 
230
230
 
231
231
 
232
- The vector 'vec1' contains the parameters of the fitting of the Solow-Costello model. The most important parameter in this case is the rate of establishment (2nd parameter).
232
+ The vector 'vec1' contains the parameters of the fitting of the Solow-Costello model. The most important parameter in this case is the change in rate of establishment (2nd parameter).
233
233
 
234
234
 
235
235
  ```python
236
- print("Fitted Rate of Establishment from the data cube: " + str(vec1[1]) + "/year")
236
+ print("Fitted change in Rate of Establishment from the data cube: " + str(vec1[1]) + "/year")
237
237
  ```
238
238
 
239
- Fitted Rate of Establishment from the data cube: -0.025016351861057464/year
239
+ Fitted change in Rate of Establishment from the data cube: -0.025016351861057464/year
240
240
 
241
241
 
242
242
  ### Step 5: Determine the error margings on the fitted rate of establishment
@@ -193,14 +193,14 @@ _, vec1 = simulation.simulate_solow_costello_scipy(time, rate, vis=True)
193
193
 
194
194
 
195
195
 
196
- The vector 'vec1' contains the parameters of the fitting of the Solow-Costello model. The most important parameter in this case is the rate of establishment (2nd parameter).
196
+ The vector 'vec1' contains the parameters of the fitting of the Solow-Costello model. The most important parameter in this case is the change in rate of establishment (2nd parameter).
197
197
 
198
198
 
199
199
  ```python
200
- print("Fitted Rate of Establishment from the data cube: " + str(vec1[1]) + "/year")
200
+ print("Fitted change in Rate of Establishment from the data cube: " + str(vec1[1]) + "/year")
201
201
  ```
202
202
 
203
- Fitted Rate of Establishment from the data cube: -0.025016351861057464/year
203
+ Fitted change in Rate of Establishment from the data cube: -0.025016351861057464/year
204
204
 
205
205
 
206
206
  ### Step 5: Determine the error margings on the fitted rate of establishment
@@ -10,10 +10,14 @@ from .simulation import simulate_solow_costello
10
10
  from .simulation import simulate_solow_costello_scipy
11
11
  from .simulation import parallel_bootstrap_solow_costello
12
12
  from .simulation import plot_with_confidence
13
+ from .simulation import get_bootstrap_errors
14
+ from .simulation import run_bootstrap_analysis
13
15
 
14
16
  __all__ = [
15
17
  "simulate_solow_costello",
16
18
  "simulate_solow_costello_scipy",
17
19
  "parallel_bootstrap_solow_costello",
18
- "plot_with_confidence"
20
+ "plot_with_confidence",
21
+ "get_bootstrap_errors",
22
+ "run_bootstrap_analysis",
19
23
  ]
@@ -206,13 +206,72 @@ def simulate_solow_costello_scipy(annual_time_gbif, annual_rate_gbif, vis=False)
206
206
 
207
207
  return C1, vec1
208
208
 
209
+ import numpy as np
210
+ import pandas as pd
211
+
212
+ def get_bootstrap_errors(annual_time, annual_rate, iterations=100):
213
+ """
214
+ Perform bootstrap resampling to estimate the standard errors of the parameters and C1 values.
215
+
216
+ Parameters
217
+ ----------
218
+ annual_time: pandas Series
219
+ Time points
220
+ annual_rate: pandas Series
221
+ Rates corresponding to the time points
222
+ iterations: int
223
+ Number of bootstrap iterations to perform
224
+
225
+ Returns
226
+ -------
227
+ vec1_mean
228
+ mean of the fitted parameters across bootstrap samples
229
+ vec1_std
230
+ standard error of the fitted parameters across bootstrap samples
231
+ C1_mean:
232
+ mean of the C1 values across bootstrap samples
233
+ C1_std:
234
+ standard error of the C1 values across bootstrap samples
235
+ """
236
+ all_vec1 = []
237
+ all_C1 = []
238
+
239
+ print(f"Starting {iterations} bootstrap iterations...")
240
+
241
+ for i in range(iterations):
242
+ # 1. Resample the data with replacement
243
+ # This simulates alternative 'histories' of the same process
244
+ indices = np.random.choice(len(annual_rate), size=len(annual_rate), replace=True)
245
+ resampled_time = annual_time.iloc[indices].sort_values()
246
+ resampled_rate = annual_rate.iloc[indices] # Keep rates associated with their times
247
+
248
+ try:
249
+ # 2. Run your existing fitting function
250
+ C1_boot, vec1_boot = simulate_solow_costello_scipy(resampled_time, resampled_rate, vis=False)
251
+ all_vec1.append(vec1_boot)
252
+ all_C1.append(C1_boot)
253
+ except Exception as e:
254
+ # Skip iterations that fail to converge
255
+ continue
256
+
257
+ # Convert to numpy arrays for easier math
258
+ all_vec1 = np.array(all_vec1)
259
+ all_C1 = np.array(all_C1)
260
+
261
+ # 3. Calculate means and standard errors
262
+ vec1_mean = np.mean(all_vec1, axis=0)
263
+ vec1_std = np.std(all_vec1, axis=0)
264
+
265
+ C1_mean = np.mean(all_C1, axis=0)
266
+ C1_std = np.std(all_C1, axis=0)
267
+
268
+ return vec1_mean, vec1_std, C1_mean, C1_std
269
+
209
270
  def bootstrap_worker(i, time_list, rate_list):
210
- '''
211
- Bootstrap on the residuals
212
- Returns=
213
- - fitting parameters (vec1)
214
- - C1_sim cumulative prediction from refit
215
- '''
271
+ """
272
+ Worker function for bootstrap analysis. Each worker will perform one bootstrap iteration.
273
+ Returns the fitted parameter and cumulative curve for that iteration.
274
+ """
216
275
  time_series = pd.Series(time_list)
217
276
  rate_series = pd.Series(rate_list)
218
277
 
@@ -237,6 +296,83 @@ def bootstrap_worker(i, time_list, rate_list):
237
296
  except Exception:
238
297
  return None
239
298
 
299
+
300
+ def run_bootstrap_analysis(time_list, rate_list, n_iterations=200):
301
+ """
302
+ Run the bootstrap analysis in parallel and aggregate results into a DataFrame.
303
+ Parameters
304
+ ----------
305
+ time_list: list or pandas Series
306
+ Time points for the analysis.
307
+ rate_list: list or pandas Series
308
+ Rates corresponding to the time points.
309
+ n_iterations: int
310
+ Number of bootstrap iterations to perform.
311
+
312
+ Returns
313
+ -------
314
+ pandas DataFrame
315
+ A DataFrame containing the mean annual rates, cumulative values, and confidence intervals.
316
+
317
+ """
318
+ param_samples = []
319
+ cumulative_samples = []
320
+
321
+ print(f"Starting {n_iterations} bootstrap iterations...")
322
+
323
+ # 1. Parallel Execution
324
+ with ProcessPoolExecutor() as executor:
325
+ # Passing time and rate lists to every worker
326
+ futures = [executor.submit(bootstrap_worker, i, time_list, rate_list) for i in range(n_iterations)]
327
+
328
+ for f in futures:
329
+ res = f.result()
330
+ if res is not None:
331
+ p_val, cum_vals = res
332
+ param_samples.append(p_val)
333
+ cumulative_samples.append(cum_vals)
334
+
335
+ if not param_samples:
336
+ raise RuntimeError("All bootstrap iterations failed. No valid samples to analyze.")
337
+
338
+ # Convert to numpy arrays
339
+ param_samples = np.array(param_samples)
340
+ cumulative_samples = np.array(cumulative_samples)
341
+
342
+ # 2. Extract Annual Rates (Deltas) from Cumulative Samples
343
+ # Since worker returns np.cumsum(C1_sim), we take the difference to get C1 back
344
+ rate_samples = np.diff(cumulative_samples, axis=1, prepend=0)
345
+
346
+ # 3. Calculate Statistics for Rates (Deltas)
347
+ rate_mean = np.mean(rate_samples, axis=0)
348
+ rate_std = np.std(rate_samples, axis=0)
349
+
350
+ # 4. Calculate Statistics for Cumulative
351
+ cum_mean = np.mean(cumulative_samples, axis=0)
352
+ cum_std = np.std(cumulative_samples, axis=0)
353
+
354
+ # 5. Print the Fit Parameter (vec1[1]) with error
355
+ p_mean = np.mean(param_samples)
356
+ p_std = np.std(param_samples)
357
+ print("\n" + "="*30)
358
+ print("FITTING PARAMETERS RESULTS")
359
+ print("="*30)
360
+ print(f"Parameter vec1[1]: {p_mean:.6f} ± {p_std:.6f}")
361
+ print("="*30 + "\n")
362
+
363
+ # 6. Build the Resulting DataFrame
364
+ df_results = pd.DataFrame({
365
+ 'Year': time_list,
366
+ 'Annual_Rate': rate_mean,
367
+ 'Annual_Rate_Error': rate_std,
368
+ 'Cumulative_Value': cum_mean,
369
+ 'Cumulative_Error': cum_std,
370
+ 'Lower_CI_95': rate_mean - (1.96 * rate_std),
371
+ 'Upper_CI_95': rate_mean + (1.96 * rate_std)
372
+ })
373
+
374
+ return df_results
375
+
240
376
  def parallel_bootstrap_solow_costello(annual_time_gbif, annual_rate_gbif, n_iterations=1000, ci=95):
241
377
  """
242
378
  Perform parallel bootstrapping of the Solow-Costello model
@@ -303,6 +439,7 @@ def parallel_bootstrap_solow_costello(annual_time_gbif, annual_rate_gbif, n_iter
303
439
  "c1_all": c1_curves
304
440
  }
305
441
 
442
+
306
443
  def plot_with_confidence(T, observed, results):
307
444
  """
308
445
  Plot the observed cumulative discoveries
@@ -27,7 +27,7 @@ def to_geoparquet(csvFile, geoFile, leftID='eqdcellcode', rightID='cellCode', ex
27
27
  data = pd.read_csv(csvFile, sep='\t')
28
28
  geoRef = gpd.read_file(geoFile, engine='pyogrio', use_arrow=True, crs="EPSG:4326")
29
29
 
30
- test_merge = pd.merge(data, qdgc_ref, left_on=leftID, right_on=rightID)
30
+ test_merge = pd.merge(data, geoRef, left_on=leftID, right_on=rightID)
31
31
 
32
32
  gdf = gpd.GeoDataFrame(test_merge, geometry='geometry')
33
33
  if gdf.crs is None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: b3alien
3
- Version: 0.2.2
3
+ Version: 0.4.0
4
4
  Summary: Calculating the CBD target 6.1 indicator from occurrence cubes
5
5
  Author-email: Maarten Trekels <maarten.trekels@plantentuinmeise.be>
6
6
  License: MIT
@@ -229,14 +229,14 @@ _, vec1 = simulation.simulate_solow_costello_scipy(time, rate, vis=True)
229
229
 
230
230
 
231
231
 
232
- The vector 'vec1' contains the parameters of the fitting of the Solow-Costello model. The most important parameter in this case is the rate of establishment (2nd parameter).
232
+ The vector 'vec1' contains the parameters of the fitting of the Solow-Costello model. The most important parameter in this case is the change in rate of establishment (2nd parameter).
233
233
 
234
234
 
235
235
  ```python
236
- print("Fitted Rate of Establishment from the data cube: " + str(vec1[1]) + "/year")
236
+ print("Fitted change in Rate of Establishment from the data cube: " + str(vec1[1]) + "/year")
237
237
  ```
238
238
 
239
- Fitted Rate of Establishment from the data cube: -0.025016351861057464/year
239
+ Fitted change in Rate of Establishment from the data cube: -0.025016351861057464/year
240
240
 
241
241
 
242
242
  ### Step 5: Determine the error margings on the fitted rate of establishment
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "b3alien"
7
- version = "0.2.2"
7
+ version = "0.4.0"
8
8
  description = "Calculating the CBD target 6.1 indicator from occurrence cubes"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -149,4 +149,76 @@ def test_SC_original():
149
149
 
150
150
  C1, vec = simulate_solow_costello_scipy(T, numdis)
151
151
 
152
- assert vec[1] > 0.0134 and vec[1] < 0.0136
152
+ assert vec[1] > 0.0134 and vec[1] < 0.0136
153
+
154
+
155
+ # This fake worker must be a top-level function to be pickleable by multiprocessing.
156
+ def _fake_bootstrap_worker_for_test(i, time_list, rate_list):
157
+ """
158
+ A predictable, fake bootstrap worker that returns known values for testing.
159
+ """
160
+ n_timesteps = len(time_list)
161
+ # Return a fake parameter and a fake cumulative curve.
162
+ # The parameter is just the iteration number `i`.
163
+ # The curve is a simple linear array based on `i`.
164
+ fake_param = float(i)
165
+ fake_cumulative_curve = np.arange(n_timesteps) * (i + 1)
166
+ return fake_param, fake_cumulative_curve
167
+
168
+
169
+ def test_run_bootstrap_analysis(monkeypatch, tiny_series):
170
+ """
171
+ Tests the aggregation and DataFrame creation logic of run_bootstrap_analysis
172
+ by mocking the parallel worker.
173
+ """
174
+ T, y = tiny_series
175
+ n_iterations = 10
176
+ n_timesteps = len(T)
177
+
178
+ # Replace the real worker with our fake one
179
+ monkeypatch.setattr('b3alien.simulation.simulation.bootstrap_worker', _fake_bootstrap_worker_for_test)
180
+
181
+ # --- Run the function under test ---
182
+ results_df = run_bootstrap_analysis(list(T), list(y), n_iterations=n_iterations)
183
+
184
+ # --- Assertions ---
185
+ assert isinstance(results_df, pd.DataFrame)
186
+ assert len(results_df) == n_timesteps
187
+ assert 'Annual_Rate' in results_df.columns
188
+ assert 'Cumulative_Value' in results_df.columns
189
+ assert 'Lower_CI_95' in results_df.columns
190
+ assert not results_df.isnull().values.any()
191
+
192
+
193
+ def test_get_bootstrap_errors(monkeypatch, tiny_series):
194
+ """
195
+ Tests the statistics calculation in get_bootstrap_errors by mocking the
196
+ underlying simulation function.
197
+ """
198
+ T, y = tiny_series
199
+ n_iterations = 5
200
+ n_timesteps = len(T)
201
+
202
+ # --- Mock the simulation function ---
203
+ # This list will hold the predictable results our fake function will return.
204
+ fake_results = []
205
+ for i in range(n_iterations):
206
+ # Create a unique but predictable result for each iteration
207
+ fake_vec = np.array([0.1, 0.2, i, i, i, i]) # vec1_boot
208
+ fake_c1 = np.full(n_timesteps, i + 1.0) # C1_boot
209
+ fake_results.append((fake_c1, fake_vec))
210
+
211
+ # This fake function will pop one of the pre-made results on each call.
212
+ def fake_simulate_scipy(annual_time, annual_rate, vis=False):
213
+ return fake_results.pop(0)
214
+
215
+ monkeypatch.setattr('b3alien.simulation.simulation.simulate_solow_costello_scipy', fake_simulate_scipy)
216
+
217
+ # --- Run the function under test ---
218
+ vec1_mean, vec1_std, C1_mean, C1_std = get_bootstrap_errors(T, y, iterations=n_iterations)
219
+
220
+ # --- Assertions ---
221
+ # Expected C1 values are [1.0, 2.0, 3.0, 4.0, 5.0] for each timestep
222
+ assert C1_mean.shape == (n_timesteps,)
223
+ np.testing.assert_allclose(C1_mean, np.full(n_timesteps, 3.0)) # Mean of 1,2,3,4,5 is 3
224
+ np.testing.assert_allclose(C1_std, np.full(n_timesteps, np.std([1,2,3,4,5])))
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes