pysephone 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pysephone-0.1.0/DATA_SOURCES.md +57 -0
- pysephone-0.1.0/LICENSE +21 -0
- pysephone-0.1.0/PKG-INFO +277 -0
- pysephone-0.1.0/README.md +201 -0
- pysephone-0.1.0/pyproject.toml +83 -0
- pysephone-0.1.0/setup.cfg +4 -0
- pysephone-0.1.0/src/pysephone/__init__.py +12 -0
- pysephone-0.1.0/src/pysephone/benchmarks/__init__.py +9 -0
- pysephone-0.1.0/src/pysephone/benchmarks/bloombench/__init__.py +67 -0
- pysephone-0.1.0/src/pysephone/benchmarks/bloombench/__main__.py +9 -0
- pysephone-0.1.0/src/pysephone/benchmarks/bloombench/cli.py +182 -0
- pysephone-0.1.0/src/pysephone/benchmarks/bloombench/compare.py +140 -0
- pysephone-0.1.0/src/pysephone/benchmarks/bloombench/config.py +152 -0
- pysephone-0.1.0/src/pysephone/benchmarks/bloombench/datasets.py +211 -0
- pysephone-0.1.0/src/pysephone/benchmarks/bloombench/fit.py +720 -0
- pysephone-0.1.0/src/pysephone/benchmarks/bloombench/runner.py +239 -0
- pysephone-0.1.0/src/pysephone/constants.py +25 -0
- pysephone-0.1.0/src/pysephone/data/__init__.py +0 -0
- pysephone-0.1.0/src/pysephone/data/agera5/__init__.py +0 -0
- pysephone-0.1.0/src/pysephone/data/agera5/download.py +590 -0
- pysephone-0.1.0/src/pysephone/data/alphaearth/__init__.py +11 -0
- pysephone-0.1.0/src/pysephone/data/alphaearth/obtain_embeddings.py +773 -0
- pysephone-0.1.0/src/pysephone/data/elevation/__init__.py +13 -0
- pysephone-0.1.0/src/pysephone/data/elevation/download.py +220 -0
- pysephone-0.1.0/src/pysephone/data/gmu_cherry/__init__.py +0 -0
- pysephone-0.1.0/src/pysephone/data/gmu_cherry/bloom_doy.py +103 -0
- pysephone-0.1.0/src/pysephone/data/gmu_cherry/data/README.md +93 -0
- pysephone-0.1.0/src/pysephone/data/gmu_cherry/data/japan.csv +6574 -0
- pysephone-0.1.0/src/pysephone/data/gmu_cherry/data/kyoto.csv +834 -0
- pysephone-0.1.0/src/pysephone/data/gmu_cherry/data/liestal.csv +129 -0
- pysephone-0.1.0/src/pysephone/data/gmu_cherry/data/meteoswiss.csv +6375 -0
- pysephone-0.1.0/src/pysephone/data/gmu_cherry/data/south_korea.csv +995 -0
- pysephone-0.1.0/src/pysephone/data/gmu_cherry/data/washingtondc.csv +102 -0
- pysephone-0.1.0/src/pysephone/data/gmu_cherry/download.py +32 -0
- pysephone-0.1.0/src/pysephone/data/gmu_cherry/regions_data.py +367 -0
- pysephone-0.1.0/src/pysephone/data/gmu_cherry/source.py +115 -0
- pysephone-0.1.0/src/pysephone/data/openmeteo/__init__.py +0 -0
- pysephone-0.1.0/src/pysephone/data/openmeteo/download.py +365 -0
- pysephone-0.1.0/src/pysephone/data/pep725/README.md +4 -0
- pysephone-0.1.0/src/pysephone/data/pep725/__init__.py +1 -0
- pysephone-0.1.0/src/pysephone/data/pep725/download.py +259 -0
- pysephone-0.1.0/src/pysephone/data/pep725/metadata/README.md +12 -0
- pysephone-0.1.0/src/pysephone/data/pep725/metadata/countries.csv +20 -0
- pysephone-0.1.0/src/pysephone/data/pep725/metadata/species.csv +16 -0
- pysephone-0.1.0/src/pysephone/data/pep725/metadata/species_entries.csv +176 -0
- pysephone-0.1.0/src/pysephone/data/pep725/metadata/species_entries_old.csv +195 -0
- pysephone-0.1.0/src/pysephone/data/pep725/metadata/species_subgroups.csv +58 -0
- pysephone-0.1.0/src/pysephone/data/pep725/source.py +150 -0
- pysephone-0.1.0/src/pysephone/data/pep725/util.py +23 -0
- pysephone-0.1.0/src/pysephone/data/resources/__init__.py +0 -0
- pysephone-0.1.0/src/pysephone/data/resources/world-administrative-boundaries.geojson +1 -0
- pysephone-0.1.0/src/pysephone/data/source.py +142 -0
- pysephone-0.1.0/src/pysephone/data/usa_npn/__init__.py +1 -0
- pysephone-0.1.0/src/pysephone/data/usa_npn/download.py +372 -0
- pysephone-0.1.0/src/pysephone/data/usa_npn/source.py +175 -0
- pysephone-0.1.0/src/pysephone/data/worldclim/__init__.py +39 -0
- pysephone-0.1.0/src/pysephone/data/worldclim/download.py +505 -0
- pysephone-0.1.0/src/pysephone/dataset/__init__.py +4 -0
- pysephone-0.1.0/src/pysephone/dataset/dataset.py +482 -0
- pysephone-0.1.0/src/pysephone/dataset/observations.py +525 -0
- pysephone-0.1.0/src/pysephone/dataset/preprocessing/__init__.py +0 -0
- pysephone-0.1.0/src/pysephone/dataset/preprocessing/gmu_cherry.py +151 -0
- pysephone-0.1.0/src/pysephone/dataset/preprocessing/pep725.py +171 -0
- pysephone-0.1.0/src/pysephone/dataset/preprocessing/usa_npn.py +142 -0
- pysephone-0.1.0/src/pysephone/dataset/registry/__init__.py +37 -0
- pysephone-0.1.0/src/pysephone/dataset/registry/gmu_cherry.py +257 -0
- pysephone-0.1.0/src/pysephone/dataset/registry/pep725.py +449 -0
- pysephone-0.1.0/src/pysephone/dataset/registry/usa_npn.py +359 -0
- pysephone-0.1.0/src/pysephone/dataset/util/__init__.py +0 -0
- pysephone-0.1.0/src/pysephone/dataset/util/agera5.py +257 -0
- pysephone-0.1.0/src/pysephone/dataset/util/alphaearth.py +224 -0
- pysephone-0.1.0/src/pysephone/dataset/util/calendar.py +147 -0
- pysephone-0.1.0/src/pysephone/dataset/util/daylength.py +200 -0
- pysephone-0.1.0/src/pysephone/dataset/util/elevation.py +155 -0
- pysephone-0.1.0/src/pysephone/dataset/util/fake_weather.py +158 -0
- pysephone-0.1.0/src/pysephone/dataset/util/feature_cache.py +307 -0
- pysephone-0.1.0/src/pysephone/dataset/util/func.py +136 -0
- pysephone-0.1.0/src/pysephone/dataset/util/openmeteo.py +311 -0
- pysephone-0.1.0/src/pysephone/dataset/util/phylogeny.py +483 -0
- pysephone-0.1.0/src/pysephone/dataset/util/provider.py +53 -0
- pysephone-0.1.0/src/pysephone/dataset/util/worldclim.py +321 -0
- pysephone-0.1.0/src/pysephone/evaluation/__init__.py +14 -0
- pysephone-0.1.0/src/pysephone/evaluation/model_comparison.py +707 -0
- pysephone-0.1.0/src/pysephone/evaluation/regression.py +432 -0
- pysephone-0.1.0/src/pysephone/models/__init__.py +127 -0
- pysephone-0.1.0/src/pysephone/models/base.py +239 -0
- pysephone-0.1.0/src/pysephone/models/beta_gdd.py +1056 -0
- pysephone-0.1.0/src/pysephone/models/bspline_gdd.py +1214 -0
- pysephone-0.1.0/src/pysephone/models/cf.py +651 -0
- pysephone-0.1.0/src/pysephone/models/cnn_1d.py +264 -0
- pysephone-0.1.0/src/pysephone/models/gdd.py +261 -0
- pysephone-0.1.0/src/pysephone/models/gru.py +226 -0
- pysephone-0.1.0/src/pysephone/models/hybrid.py +313 -0
- pysephone-0.1.0/src/pysephone/models/linear_trend.py +158 -0
- pysephone-0.1.0/src/pysephone/models/lstm.py +242 -0
- pysephone-0.1.0/src/pysephone/models/lstm_ctx.py +354 -0
- pysephone-0.1.0/src/pysephone/models/mean.py +60 -0
- pysephone-0.1.0/src/pysephone/models/process_based.py +313 -0
- pysephone-0.1.0/src/pysephone/models/pvtt.py +415 -0
- pysephone-0.1.0/src/pysephone/models/random_forest.py +246 -0
- pysephone-0.1.0/src/pysephone/models/torch_base.py +675 -0
- pysephone-0.1.0/src/pysephone/models/transformer.py +303 -0
- pysephone-0.1.0/src/pysephone/models/unimodal_hybrid.py +265 -0
- pysephone-0.1.0/src/pysephone/models/util/causal_cnn.py +23 -0
- pysephone-0.1.0/src/pysephone/models/util/dataset_torch.py +12 -0
- pysephone-0.1.0/src/pysephone/models/util/early_stopping.py +164 -0
- pysephone-0.1.0/src/pysephone/models/util/flat_features.py +47 -0
- pysephone-0.1.0/src/pysephone/models/util/func_phenology.py +312 -0
- pysephone-0.1.0/src/pysephone/models/util/func_phenology_torch.py +105 -0
- pysephone-0.1.0/src/pysephone/models/util/monotone_cnn.py +49 -0
- pysephone-0.1.0/src/pysephone/models/util/onehot_encoders.py +339 -0
- pysephone-0.1.0/src/pysephone/models/util/pointwise_head.py +58 -0
- pysephone-0.1.0/src/pysephone/models/util/soft_threshold.py +79 -0
- pysephone-0.1.0/src/pysephone/models/util/ttcnn.py +65 -0
- pysephone-0.1.0/src/pysephone/models/wheat_hybrid.py +288 -0
- pysephone-0.1.0/src/pysephone/models/xgb.py +258 -0
- pysephone-0.1.0/src/pysephone/paths.py +141 -0
- pysephone-0.1.0/src/pysephone/run/__init__.py +0 -0
- pysephone-0.1.0/src/pysephone/run/args_util/__init__.py +2 -0
- pysephone-0.1.0/src/pysephone/run/args_util/dataset_init.py +338 -0
- pysephone-0.1.0/src/pysephone/run/args_util/eval.py +57 -0
- pysephone-0.1.0/src/pysephone/run/args_util/model.py +57 -0
- pysephone-0.1.0/src/pysephone/run/fit_eval.py +128 -0
- pysephone-0.1.0/src/pysephone/run/readme_figures.py +570 -0
- pysephone-0.1.0/src/pysephone/run/visualize_datasets.py +174 -0
- pysephone-0.1.0/src/pysephone/utils/__init__.py +0 -0
- pysephone-0.1.0/src/pysephone/utils/func.py +11 -0
- pysephone-0.1.0/src/pysephone/utils/func_torch.py +38 -0
- pysephone-0.1.0/src/pysephone/utils/species_encoder.py +103 -0
- pysephone-0.1.0/src/pysephone/visualize/__init__.py +0 -0
- pysephone-0.1.0/src/pysephone/visualize/dataset.py +582 -0
- pysephone-0.1.0/src/pysephone.egg-info/PKG-INFO +277 -0
- pysephone-0.1.0/src/pysephone.egg-info/SOURCES.txt +144 -0
- pysephone-0.1.0/src/pysephone.egg-info/dependency_links.txt +1 -0
- pysephone-0.1.0/src/pysephone.egg-info/requires.txt +43 -0
- pysephone-0.1.0/src/pysephone.egg-info/top_level.txt +1 -0
- pysephone-0.1.0/tests/test_agera5.py +263 -0
- pysephone-0.1.0/tests/test_bloombench.py +386 -0
- pysephone-0.1.0/tests/test_dataset.py +723 -0
- pysephone-0.1.0/tests/test_daylength.py +297 -0
- pysephone-0.1.0/tests/test_evaluation.py +254 -0
- pysephone-0.1.0/tests/test_gdd.py +191 -0
- pysephone-0.1.0/tests/test_pep725_source.py +125 -0
- pysephone-0.1.0/tests/test_phylogeny.py +338 -0
- pysephone-0.1.0/tests/test_torch_base.py +435 -0
- pysephone-0.1.0/tests/test_usa_npn_source.py +219 -0
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# Data sources, attribution & licenses
|
|
2
|
+
|
|
3
|
+
The **pysephone source code is licensed under the MIT License** (see `LICENSE`).
|
|
4
|
+
|
|
5
|
+
This package also **bundles a number of third-party reference datasets** under
|
|
6
|
+
`src/pysephone/data/`. Those datasets are **not** covered by the MIT license —
|
|
7
|
+
each retains the terms of its original source, as documented below. If you use,
|
|
8
|
+
redistribute, or build upon pysephone, you are responsible for honoring the terms
|
|
9
|
+
of any bundled dataset you rely on, including any attribution and use
|
|
10
|
+
restrictions noted here.
|
|
11
|
+
|
|
12
|
+
> ⚠️ **Note on commercial use.** Most bundled datasets permit reuse with
|
|
13
|
+
> attribution, but **`liestal.csv` is licensed for non-commercial use only**, and
|
|
14
|
+
> **`kyoto.csv`** is an academic dataset provided for research use with required
|
|
15
|
+
> citations and **no explicit redistribution grant**. These two files are
|
|
16
|
+
> redistributed here on those terms; commercial users in particular must review
|
|
17
|
+
> them before relying on these files.
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## GMU cherry blossom data — `src/pysephone/data/gmu_cherry/data/`
|
|
22
|
+
|
|
23
|
+
Cleaned peak-bloom / first-flowering records (schema:
|
|
24
|
+
`location, lat, long, alt, year, bloom_date, bloom_doy`). Per-file provenance:
|
|
25
|
+
|
|
26
|
+
| File | Source | License / terms | Attribution / citation |
|
|
27
|
+
|---|---|---|---|
|
|
28
|
+
| `washingtondc.csv` | US EPA, Climate Change Indicators — Cherry Blossoms | US federal source (public domain); see source for details | "Source: U.S. EPA, Climate Change Indicators in the United States — https://www.epa.gov/climate-indicators/cherry-blossoms" |
|
|
29
|
+
| `meteoswiss.csv` | MeteoSwiss / opendata.swiss (phenological observations) | **Open use, incl. commercial**; must provide source | "Source: MeteoSwiss" |
|
|
30
|
+
| `japan.csv` | Japan Meteorological Agency | Cite source | "Source: Japan Meteorological Agency — https://www.data.jma.go.jp/sakura/data/pdf/005.pdf" |
|
|
31
|
+
| `south_korea.csv` | Korea Meteorological Administration | Cite source | "Source: Korean Meteorological Administration" |
|
|
32
|
+
| `liestal.csv` | Landwirtschaftliches Zentrum Ebenrain, Sissach & MeteoSwiss | **Non-commercial use only**; must provide source | "Source: Landwirtschaftliches Zentrum Ebenrain, Sissach and MeteoSwiss" |
|
|
33
|
+
| `kyoto.csv` | Yasuyuki Aono, Osaka Prefecture University | Academic / research use; **no explicit redistribution grant**; cite the papers below | Aono & Saito (2010), *Int. J. Biometeorology* 54:211–219; Aono & Kazui (2008), *Int. J. Climatology* 28:905–914. Source: http://atmenv.envi.osakafu-u.ac.jp/aono/kyophenotemp4/ |
|
|
34
|
+
|
|
35
|
+
See `src/pysephone/data/gmu_cherry/data/README.md` for the full per-source notes.
|
|
36
|
+
This collection originates from George Mason University's public
|
|
37
|
+
`peak-bloom-prediction` competition repository.
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
41
|
+
## PEP725 metadata — `src/pysephone/data/pep725/metadata/`
|
|
42
|
+
|
|
43
|
+
Only **lookup/metadata tables** (PEP725 species codes, country codes, and the
|
|
44
|
+
catalog of entries to download) are bundled here. The actual PEP725 phenology
|
|
45
|
+
**observations are not redistributed** — they are downloaded at runtime using
|
|
46
|
+
your own PEP725 account credentials (see `src/pysephone/data/pep725/README.md`).
|
|
47
|
+
PEP725 data is subject to the PEP725 data policy (https://www.pep725.eu/).
|
|
48
|
+
|
|
49
|
+
---
|
|
50
|
+
|
|
51
|
+
## World administrative boundaries — `src/pysephone/data/resources/`
|
|
52
|
+
|
|
53
|
+
`world-administrative-boundaries.geojson` — obtained from OpenDataSoft
|
|
54
|
+
(https://public.opendatasoft.com/explore/dataset/world-administrative-boundaries/),
|
|
55
|
+
derived from Natural Earth (public domain) and related sources. Used only for
|
|
56
|
+
optional map backgrounds in visualizations. Please review OpenDataSoft's terms
|
|
57
|
+
for the dataset before redistribution.
|
pysephone-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 TODO
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
pysephone-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pysephone
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A Python library for using machine learning to predict the timing of phenological events in plants
|
|
5
|
+
Author-email: Ron van Bree <ronvbree@gmail.com>
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2026 TODO
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Project-URL: Homepage, https://github.com/ronvree/pysephone
|
|
29
|
+
Project-URL: Repository, https://github.com/ronvree/pysephone
|
|
30
|
+
Keywords: phenology,machine-learning,bloom,agriculture,remote-sensing
|
|
31
|
+
Classifier: Development Status :: 3 - Alpha
|
|
32
|
+
Classifier: Intended Audience :: Science/Research
|
|
33
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
34
|
+
Classifier: Programming Language :: Python :: 3
|
|
35
|
+
Classifier: Topic :: Scientific/Engineering
|
|
36
|
+
Requires-Python: >=3.14
|
|
37
|
+
Description-Content-Type: text/markdown
|
|
38
|
+
License-File: LICENSE
|
|
39
|
+
License-File: DATA_SOURCES.md
|
|
40
|
+
Requires-Dist: pandas
|
|
41
|
+
Requires-Dist: numpy>=1.23
|
|
42
|
+
Requires-Dist: scipy
|
|
43
|
+
Requires-Dist: scikit-learn
|
|
44
|
+
Requires-Dist: matplotlib
|
|
45
|
+
Requires-Dist: tqdm
|
|
46
|
+
Requires-Dist: requests
|
|
47
|
+
Requires-Dist: requests-cache
|
|
48
|
+
Requires-Dist: retry-requests
|
|
49
|
+
Requires-Dist: unidecode
|
|
50
|
+
Requires-Dist: nlopt
|
|
51
|
+
Requires-Dist: tables
|
|
52
|
+
Requires-Dist: h5py
|
|
53
|
+
Requires-Dist: platformdirs
|
|
54
|
+
Provides-Extra: deep
|
|
55
|
+
Requires-Dist: torch>=2.0; extra == "deep"
|
|
56
|
+
Provides-Extra: boost
|
|
57
|
+
Requires-Dist: xgboost>=1.7; extra == "boost"
|
|
58
|
+
Provides-Extra: agera5
|
|
59
|
+
Requires-Dist: cdsapi; extra == "agera5"
|
|
60
|
+
Requires-Dist: xarray; extra == "agera5"
|
|
61
|
+
Requires-Dist: netCDF4; extra == "agera5"
|
|
62
|
+
Provides-Extra: openmeteo
|
|
63
|
+
Requires-Dist: openmeteo-requests; extra == "openmeteo"
|
|
64
|
+
Provides-Extra: geo
|
|
65
|
+
Requires-Dist: geopandas; extra == "geo"
|
|
66
|
+
Requires-Dist: shapely; extra == "geo"
|
|
67
|
+
Requires-Dist: rasterio; extra == "geo"
|
|
68
|
+
Provides-Extra: earthengine
|
|
69
|
+
Requires-Dist: earthengine-api; extra == "earthengine"
|
|
70
|
+
Provides-Extra: stats
|
|
71
|
+
Requires-Dist: scikit-posthocs; extra == "stats"
|
|
72
|
+
Requires-Dist: autorank; extra == "stats"
|
|
73
|
+
Provides-Extra: all
|
|
74
|
+
Requires-Dist: pysephone[agera5,boost,deep,earthengine,geo,openmeteo,stats]; extra == "all"
|
|
75
|
+
Dynamic: license-file
|
|
76
|
+
|
|
77
|
+
<div align="center">
|
|
78
|
+
|
|
79
|
+
# pysephone
|
|
80
|
+
|
|
81
|
+

|
|
82
|
+
[](LICENSE)
|
|
83
|
+
|
|
84
|
+
</div>
|
|
85
|
+
<br>
|
|
86
|
+
|
|
87
|
+
## Description
|
|
88
|
+
|
|
89
|
+
**pysephone** is a Python package for developing and benchmarking crop phenology models — models that predict the timing of key developmental events in plants, such as flowering, leaf-out, or harvest maturity. Accurate phenology predictions are essential for agricultural planning, yield forecasting, and understanding how ecosystems respond to climate variability and long-term change. As growing seasons shift under climate change, the ability to reliably model phenological timing across species and regions becomes increasingly important for both science and policy.
|
|
90
|
+
|
|
91
|
+
pysephone provides a standardised pipeline that connects observational phenology databases with meteorological drivers, and a suite of models ranging from classical process-based approaches to deep learning, all sharing a common interface.
|
|
92
|
+
|
|
93
|
+
The package is designed to make it straightforward to:
|
|
94
|
+
- load and preprocess phenological observation data from multiple sources,
|
|
95
|
+
- pair observations with season-windowed meteorological time series (ERA5 reanalysis supported out of the box; other drivers can be integrated),
|
|
96
|
+
- define phenology datasets for standardised intercomparison of models,
|
|
97
|
+
- fit and evaluate a variety of models, and
|
|
98
|
+
- systematically compare model behaviour across species, regions, and climate conditions.
|
|
99
|
+
|
|
100
|
+
<br>
|
|
101
|
+
|
|
102
|
+
## Installation
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
git clone https://github.com/ronvree/pysephone.git
|
|
106
|
+
cd pysephone
|
|
107
|
+
pip install -e .
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
Requires Python ≥ 3.14.
|
|
111
|
+
|
|
112
|
+
The base install is intentionally lightweight (process-based + scikit-learn models, datasets, evaluation). Heavier and source-specific dependencies are opt-in via extras:
|
|
113
|
+
|
|
114
|
+
| Extra | Adds | Needed for |
|
|
115
|
+
|---|---|---|
|
|
116
|
+
| `deep` | PyTorch | LSTM / GRU / CNN / Transformer / hybrid / Beta-GDD / BSpline-GDD models |
|
|
117
|
+
| `boost` | XGBoost | `XGBoostModel` |
|
|
118
|
+
| `agera5` | cdsapi, xarray, netCDF4 | Downloading AgERA5 from Copernicus CDS |
|
|
119
|
+
| `openmeteo` | openmeteo-requests | Downloading Open-Meteo ERA5 |
|
|
120
|
+
| `geo` | geopandas, shapely, rasterio | Map visualizations, WorldClim rasters |
|
|
121
|
+
| `earthengine` | earthengine-api | Fetching AlphaEarth embeddings |
|
|
122
|
+
| `stats` | scikit-posthocs, autorank | Friedman/Nemenyi comparison + critical-difference plots |
|
|
123
|
+
| `all` | everything above | Convenience meta-extra (every model + data source) |
|
|
124
|
+
|
|
125
|
+
Install one or more with e.g. `pip install "pysephone[deep]"` or `pip install "pysephone[deep,agera5]"`. Accessing a model whose extra isn't installed raises a clear error telling you which extra to add.
|
|
126
|
+
|
|
127
|
+
Reproducing **BloomBench** specifically needs `pip install "pysephone[deep,boost,agera5,stats]"` — its models are CNN/LSTM/Transformer (`deep`), XGBoost (`boost`), RandomForest/Mean/Linear (base); climate features come from AgERA5 (`agera5`); and the `compare` step's Nemenyi/critical-difference plots need `stats`. It does **not** use AlphaEarth/Earth Engine or Open-Meteo.
|
|
128
|
+
|
|
129
|
+
<br>
|
|
130
|
+
|
|
131
|
+
## Data Sources
|
|
132
|
+
|
|
133
|
+
| Source | Description |
|
|
134
|
+
|---|---|
|
|
135
|
+
| **PEP725** | Pan-European Phenology Database — multi-species observations across Europe |
|
|
136
|
+
| **GMU Cherry Blossom** | Cherry blossom bloom dates from Japan, Switzerland, and South Korea |
|
|
137
|
+
| **USA-NPN** | USA National Phenology Network — deciduous fruit-tree observations |
|
|
138
|
+
| **AgERA5** | Daily agrometeorological indicators from Copernicus CDS (downscaled temperature/radiation, Penman–Monteith inputs, etc.) |
|
|
139
|
+
| **Open-Meteo ERA5** | ERA5 reanalysis via the Open-Meteo archive |
|
|
140
|
+
|
|
141
|
+
Meteorological data is cached locally in HDF5 for fast repeated access. Additional providers can be integrated by implementing the `FeatureProvider` interface.
|
|
142
|
+
|
|
143
|
+
Some reference datasets (e.g. the cherry-blossom bloom records) are bundled with the package. These third-party datasets retain their original licenses and attribution requirements — see [DATA_SOURCES.md](DATA_SOURCES.md). Note that `liestal.csv` is non-commercial-use only and `kyoto.csv` is provided for academic use with required citations.
|
|
144
|
+
|
|
145
|
+
<br>
|
|
146
|
+
|
|
147
|
+
## Authentication & configuration
|
|
148
|
+
|
|
149
|
+
Some data sources reach external APIs that require **your own** account/project — pysephone ships no credentials and no default project. Set these up once before running the download steps:
|
|
150
|
+
|
|
151
|
+
**Copernicus CDS (AgERA5).** Authentication uses the [`cdsapi`](https://cds.climate.copernicus.eu/how-to-api) convention — pysephone handles no keys itself. Provide your credentials via either:
|
|
152
|
+
- a `~/.cdsapirc` file, or
|
|
153
|
+
- the `CDSAPI_URL` and `CDSAPI_KEY` environment variables.
|
|
154
|
+
|
|
155
|
+
**Google Earth Engine (AlphaEarth embeddings).** Requires a Google Cloud project with the Earth Engine API enabled:
|
|
156
|
+
1. Authenticate once: `python -c "import ee; ee.Authenticate()"`.
|
|
157
|
+
2. Tell pysephone which project to use, in priority order:
|
|
158
|
+
- pass `ee_project="your-gcp-project"` to `fetch_alphaearth_embeddings_batched(...)`, or
|
|
159
|
+
- set the `PYSEPHONE_EE_PROJECT` environment variable (Earth Engine's native `EARTHENGINE_PROJECT` is also honored), or
|
|
160
|
+
- leave it unset to let Earth Engine resolve its own default project.
|
|
161
|
+
|
|
162
|
+
**Data location.** All caches, downloaded data, and outputs are written under a single data root. By default this is an OS-native per-user directory (`%LOCALAPPDATA%\pysephone` on Windows, `~/.local/share/pysephone` on Linux/macOS). Override it with the `PYSEPHONE_DATA_ROOT` environment variable — e.g. set `PYSEPHONE_DATA_ROOT=<repo>` to keep data inside a source checkout during development.
|
|
163
|
+
|
|
164
|
+
<br>
|
|
165
|
+
|
|
166
|
+
## Models
|
|
167
|
+
|
|
168
|
+
| Category | Models |
|
|
169
|
+
|---|---|
|
|
170
|
+
| Baseline | Mean |
|
|
171
|
+
| Process-based | GDD, Utah+GDD, ChillingDays+GDD, Dynamic+GDD |
|
|
172
|
+
| Machine learning | Random Forest |
|
|
173
|
+
| Deep learning | LSTM, Hybrid (TTCNN chilling + GDD forcing) |
|
|
174
|
+
|
|
175
|
+
All models share a common `fit` / `predict` interface, making it easy to add new models or swap them in evaluation pipelines.
|
|
176
|
+
|
|
177
|
+
<br>
|
|
178
|
+
|
|
179
|
+
## Pipeline Overview
|
|
180
|
+
|
|
181
|
+
```
|
|
182
|
+
Data source (PEP725 / GMU Cherry)
|
|
183
|
+
↓ preprocessing (outlier removal, grid aggregation)
|
|
184
|
+
Observations (indexed by source, location, year, species, obs type)
|
|
185
|
+
↓ paired with Calendar + meteorological feature provider
|
|
186
|
+
Dataset (yields season-windowed feature arrays per sample)
|
|
187
|
+
↓
|
|
188
|
+
Model.fit(target_fn, dataset) → Model.predict(sample)
|
|
189
|
+
↓
|
|
190
|
+
SingleTargetRegression.run(...) → metrics, error DataFrames, plots
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
The `Calendar` defines the season window (start date + length) for each entry. Feature providers retrieve the corresponding meteorological time series for each sample.
|
|
194
|
+
|
|
195
|
+
<br>
|
|
196
|
+
|
|
197
|
+
## Reproducing BloomBench
|
|
198
|
+
|
|
199
|
+
[BloomBench](https://github.com/WUR-AI/BloomBench) is a multi-species benchmark for evaluating ML phenology models on fruit-tree flowering. The benchmark is shipped as a first-class library module: [`pysephone.benchmarks.bloombench`](src/pysephone/benchmarks/bloombench/).
|
|
200
|
+
|
|
201
|
+
It exposes both a Python API and a thin CLI. Reproducing the benchmark needs the deep-learning, boosting, AgERA5, and stats extras (it does **not** use Earth Engine or Open-Meteo):
|
|
202
|
+
|
|
203
|
+
```bash
|
|
204
|
+
pip install "pysephone[deep,boost,agera5,stats]"
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
```bash
|
|
208
|
+
# 1. Populate the AgERA5 cache once (Copernicus CDS credentials required).
|
|
209
|
+
jupyter nbconvert --execute notebooks/download_agera5.ipynb
|
|
210
|
+
|
|
211
|
+
# 2. Tune hyperparameters per (dataset, model) — overnight run.
|
|
212
|
+
python -m pysephone.benchmarks.bloombench hpo
|
|
213
|
+
|
|
214
|
+
# 3. Fit & evaluate every (seed, dataset, model) triple.
|
|
215
|
+
python -m pysephone.benchmarks.bloombench run --seeds 0 1 2
|
|
216
|
+
|
|
217
|
+
# 4. Friedman + Nemenyi + critical-difference plots.
|
|
218
|
+
python -m pysephone.benchmarks.bloombench compare --seeds 0 1 2
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
The same flow as Python:
|
|
222
|
+
|
|
223
|
+
```python
|
|
224
|
+
from pysephone.benchmarks.bloombench import (
|
|
225
|
+
load_bloombench_datasets, run_benchmark, run_comparison, run_hpo,
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
datasets, _ = load_bloombench_datasets()
|
|
229
|
+
run_hpo(datasets) # one-time HPO
|
|
230
|
+
results = run_benchmark(seeds=[0, 1, 2], datasets_dict=datasets)
|
|
231
|
+
report = run_comparison(seeds=[0, 1, 2])
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
For the interactive flow with tables / heatmaps / critical-difference plots, see [`notebooks/bloombench_extended_hpo.ipynb`](notebooks/bloombench_extended_hpo.ipynb) (one-time HPO) and [`notebooks/bloombench_extended.ipynb`](notebooks/bloombench_extended.ipynb) (replication).
|
|
235
|
+
|
|
236
|
+
<br>
|
|
237
|
+
|
|
238
|
+
## Project Structure
|
|
239
|
+
|
|
240
|
+
```
|
|
241
|
+
.
|
|
242
|
+
├── src/pysephone/
|
|
243
|
+
│ ├── benchmarks/ # End-to-end benchmark suites (BloomBench, …)
|
|
244
|
+
│ ├── data/ # Data ingestion and sources (PEP725, GMU Cherry, USA-NPN, AgERA5)
|
|
245
|
+
│ ├── dataset/ # Observations, Dataset, Calendar, feature providers, registry
|
|
246
|
+
│ ├── evaluation/ # Evaluation logic and regression metrics
|
|
247
|
+
│ ├── models/ # Model implementations (CF, RF, LSTM, Hybrid, …)
|
|
248
|
+
│ ├── utils/ # Shared utilities
|
|
249
|
+
│ └── visualize/ # Visualisation helpers
|
|
250
|
+
├── notebooks/ # Jupyter notebooks for exploration and analysis
|
|
251
|
+
├── scripts/ # Standalone scripts
|
|
252
|
+
├── tests/ # Test suite
|
|
253
|
+
└── data/ # Raw and processed data (git-ignored)
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
<br>
|
|
257
|
+
|
|
258
|
+
## Notebooks
|
|
259
|
+
|
|
260
|
+
| Notebook | Description |
|
|
261
|
+
|---|---|
|
|
262
|
+
| `cherry_blossom_cf_models.ipynb` | Process-based model evaluation on GMU Cherry datasets |
|
|
263
|
+
| `cf_models_pep725_fruit_trees.ipynb` | CF model evaluation across PEP725 fruit tree species |
|
|
264
|
+
| `unusual_year_model_eval.ipynb` | Model comparison on climatologically unusual vs normal years |
|
|
265
|
+
| `unusual_seasons_*.ipynb` | Exploration of unusual seasons in GMU / PEP725 data |
|
|
266
|
+
| `dataset_adequacy_*.ipynb` | Sample sufficiency analysis per dataset |
|
|
267
|
+
| `lstm_cherry_exploration.ipynb` | LSTM model exploration on cherry blossom data |
|
|
268
|
+
| `model_exploration.ipynb` | General model exploration notebook |
|
|
269
|
+
| `pvtt_winter_wheat.ipynb` | PVTT model for winter wheat phenology |
|
|
270
|
+
|
|
271
|
+
<br>
|
|
272
|
+
|
|
273
|
+
## Dependencies
|
|
274
|
+
|
|
275
|
+
Base install: `pandas`, `numpy`, `scipy`, `scikit-learn`, `matplotlib`, `nlopt`, `tables`, `h5py`, `requests`, `requests-cache`, `retry-requests`, `tqdm`, `unidecode`, `platformdirs`.
|
|
276
|
+
|
|
277
|
+
Heavier and source-specific dependencies (`torch`, `xgboost`, `cdsapi`, `openmeteo-requests`, `geopandas`/`shapely`/`rasterio`, `earthengine-api`, …) are opt-in via the [extras](#installation) above.
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
|
|
3
|
+
# pysephone
|
|
4
|
+
|
|
5
|
+

|
|
6
|
+
[](LICENSE)
|
|
7
|
+
|
|
8
|
+
</div>
|
|
9
|
+
<br>
|
|
10
|
+
|
|
11
|
+
## Description
|
|
12
|
+
|
|
13
|
+
**pysephone** is a Python package for developing and benchmarking crop phenology models — models that predict the timing of key developmental events in plants, such as flowering, leaf-out, or harvest maturity. Accurate phenology predictions are essential for agricultural planning, yield forecasting, and understanding how ecosystems respond to climate variability and long-term change. As growing seasons shift under climate change, the ability to reliably model phenological timing across species and regions becomes increasingly important for both science and policy.
|
|
14
|
+
|
|
15
|
+
pysephone provides a standardised pipeline that connects observational phenology databases with meteorological drivers, and a suite of models ranging from classical process-based approaches to deep learning, all sharing a common interface.
|
|
16
|
+
|
|
17
|
+
The package is designed to make it straightforward to:
|
|
18
|
+
- load and preprocess phenological observation data from multiple sources,
|
|
19
|
+
- pair observations with season-windowed meteorological time series (ERA5 reanalysis supported out of the box; other drivers can be integrated),
|
|
20
|
+
- define phenology datasets for standardised intercomparison of models,
|
|
21
|
+
- fit and evaluate a variety of models, and
|
|
22
|
+
- systematically compare model behaviour across species, regions, and climate conditions.
|
|
23
|
+
|
|
24
|
+
<br>
|
|
25
|
+
|
|
26
|
+
## Installation
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
git clone https://github.com/ronvree/pysephone.git
|
|
30
|
+
cd pysephone
|
|
31
|
+
pip install -e .
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
Requires Python ≥ 3.14.
|
|
35
|
+
|
|
36
|
+
The base install is intentionally lightweight (process-based + scikit-learn models, datasets, evaluation). Heavier and source-specific dependencies are opt-in via extras:
|
|
37
|
+
|
|
38
|
+
| Extra | Adds | Needed for |
|
|
39
|
+
|---|---|---|
|
|
40
|
+
| `deep` | PyTorch | LSTM / GRU / CNN / Transformer / hybrid / Beta-GDD / BSpline-GDD models |
|
|
41
|
+
| `boost` | XGBoost | `XGBoostModel` |
|
|
42
|
+
| `agera5` | cdsapi, xarray, netCDF4 | Downloading AgERA5 from Copernicus CDS |
|
|
43
|
+
| `openmeteo` | openmeteo-requests | Downloading Open-Meteo ERA5 |
|
|
44
|
+
| `geo` | geopandas, shapely, rasterio | Map visualizations, WorldClim rasters |
|
|
45
|
+
| `earthengine` | earthengine-api | Fetching AlphaEarth embeddings |
|
|
46
|
+
| `stats` | scikit-posthocs, autorank | Friedman/Nemenyi comparison + critical-difference plots |
|
|
47
|
+
| `all` | everything above | Convenience meta-extra (every model + data source) |
|
|
48
|
+
|
|
49
|
+
Install one or more with e.g. `pip install "pysephone[deep]"` or `pip install "pysephone[deep,agera5]"`. Accessing a model whose extra isn't installed raises a clear error telling you which extra to add.
|
|
50
|
+
|
|
51
|
+
Reproducing **BloomBench** specifically needs `pip install "pysephone[deep,boost,agera5,stats]"` — its models are CNN/LSTM/Transformer (`deep`), XGBoost (`boost`), RandomForest/Mean/Linear (base); climate features come from AgERA5 (`agera5`); and the `compare` step's Nemenyi/critical-difference plots need `stats`. It does **not** use AlphaEarth/Earth Engine or Open-Meteo.
|
|
52
|
+
|
|
53
|
+
<br>
|
|
54
|
+
|
|
55
|
+
## Data Sources
|
|
56
|
+
|
|
57
|
+
| Source | Description |
|
|
58
|
+
|---|---|
|
|
59
|
+
| **PEP725** | Pan-European Phenology Database — multi-species observations across Europe |
|
|
60
|
+
| **GMU Cherry Blossom** | Cherry blossom bloom dates from Japan, Switzerland, and South Korea |
|
|
61
|
+
| **USA-NPN** | USA National Phenology Network — deciduous fruit-tree observations |
|
|
62
|
+
| **AgERA5** | Daily agrometeorological indicators from Copernicus CDS (downscaled temperature/radiation, Penman–Monteith inputs, etc.) |
|
|
63
|
+
| **Open-Meteo ERA5** | ERA5 reanalysis via the Open-Meteo archive |
|
|
64
|
+
|
|
65
|
+
Meteorological data is cached locally in HDF5 for fast repeated access. Additional providers can be integrated by implementing the `FeatureProvider` interface.
|
|
66
|
+
|
|
67
|
+
Some reference datasets (e.g. the cherry-blossom bloom records) are bundled with the package. These third-party datasets retain their original licenses and attribution requirements — see [DATA_SOURCES.md](DATA_SOURCES.md). Note that `liestal.csv` is non-commercial-use only and `kyoto.csv` is provided for academic use with required citations.
|
|
68
|
+
|
|
69
|
+
<br>
|
|
70
|
+
|
|
71
|
+
## Authentication & configuration
|
|
72
|
+
|
|
73
|
+
Some data sources reach external APIs that require **your own** account/project — pysephone ships no credentials and no default project. Set these up once before running the download steps:
|
|
74
|
+
|
|
75
|
+
**Copernicus CDS (AgERA5).** Authentication uses the [`cdsapi`](https://cds.climate.copernicus.eu/how-to-api) convention — pysephone handles no keys itself. Provide your credentials via either:
|
|
76
|
+
- a `~/.cdsapirc` file, or
|
|
77
|
+
- the `CDSAPI_URL` and `CDSAPI_KEY` environment variables.
|
|
78
|
+
|
|
79
|
+
**Google Earth Engine (AlphaEarth embeddings).** Requires a Google Cloud project with the Earth Engine API enabled:
|
|
80
|
+
1. Authenticate once: `python -c "import ee; ee.Authenticate()"`.
|
|
81
|
+
2. Tell pysephone which project to use, in priority order:
|
|
82
|
+
- pass `ee_project="your-gcp-project"` to `fetch_alphaearth_embeddings_batched(...)`, or
|
|
83
|
+
- set the `PYSEPHONE_EE_PROJECT` environment variable (Earth Engine's native `EARTHENGINE_PROJECT` is also honored), or
|
|
84
|
+
- leave it unset to let Earth Engine resolve its own default project.
|
|
85
|
+
|
|
86
|
+
**Data location.** All caches, downloaded data, and outputs are written under a single data root. By default this is an OS-native per-user directory (`%LOCALAPPDATA%\pysephone` on Windows, `~/.local/share/pysephone` on Linux/macOS). Override it with the `PYSEPHONE_DATA_ROOT` environment variable — e.g. set `PYSEPHONE_DATA_ROOT=<repo>` to keep data inside a source checkout during development.
|
|
87
|
+
|
|
88
|
+
<br>
|
|
89
|
+
|
|
90
|
+
## Models
|
|
91
|
+
|
|
92
|
+
| Category | Models |
|
|
93
|
+
|---|---|
|
|
94
|
+
| Baseline | Mean |
|
|
95
|
+
| Process-based | GDD, Utah+GDD, ChillingDays+GDD, Dynamic+GDD |
|
|
96
|
+
| Machine learning | Random Forest |
|
|
97
|
+
| Deep learning | LSTM, Hybrid (TTCNN chilling + GDD forcing) |
|
|
98
|
+
|
|
99
|
+
All models share a common `fit` / `predict` interface, making it easy to add new models or swap them in evaluation pipelines.
|
|
100
|
+
|
|
101
|
+
<br>
|
|
102
|
+
|
|
103
|
+
## Pipeline Overview
|
|
104
|
+
|
|
105
|
+
```
|
|
106
|
+
Data source (PEP725 / GMU Cherry)
|
|
107
|
+
↓ preprocessing (outlier removal, grid aggregation)
|
|
108
|
+
Observations (indexed by source, location, year, species, obs type)
|
|
109
|
+
↓ paired with Calendar + meteorological feature provider
|
|
110
|
+
Dataset (yields season-windowed feature arrays per sample)
|
|
111
|
+
↓
|
|
112
|
+
Model.fit(target_fn, dataset) → Model.predict(sample)
|
|
113
|
+
↓
|
|
114
|
+
SingleTargetRegression.run(...) → metrics, error DataFrames, plots
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
The `Calendar` defines the season window (start date + length) for each entry. Feature providers retrieve the corresponding meteorological time series for each sample.
|
|
118
|
+
|
|
119
|
+
<br>
|
|
120
|
+
|
|
121
|
+
## Reproducing BloomBench
|
|
122
|
+
|
|
123
|
+
[BloomBench](https://github.com/WUR-AI/BloomBench) is a multi-species benchmark for evaluating ML phenology models on fruit-tree flowering. The benchmark is shipped as a first-class library module: [`pysephone.benchmarks.bloombench`](src/pysephone/benchmarks/bloombench/).
|
|
124
|
+
|
|
125
|
+
It exposes both a Python API and a thin CLI. Reproducing the benchmark needs the deep-learning, boosting, AgERA5, and stats extras (it does **not** use Earth Engine or Open-Meteo):
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
pip install "pysephone[deep,boost,agera5,stats]"
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
# 1. Populate the AgERA5 cache once (Copernicus CDS credentials required).
|
|
133
|
+
jupyter nbconvert --execute notebooks/download_agera5.ipynb
|
|
134
|
+
|
|
135
|
+
# 2. Tune hyperparameters per (dataset, model) — overnight run.
|
|
136
|
+
python -m pysephone.benchmarks.bloombench hpo
|
|
137
|
+
|
|
138
|
+
# 3. Fit & evaluate every (seed, dataset, model) triple.
|
|
139
|
+
python -m pysephone.benchmarks.bloombench run --seeds 0 1 2
|
|
140
|
+
|
|
141
|
+
# 4. Friedman + Nemenyi + critical-difference plots.
|
|
142
|
+
python -m pysephone.benchmarks.bloombench compare --seeds 0 1 2
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
The same flow as Python:
|
|
146
|
+
|
|
147
|
+
```python
|
|
148
|
+
from pysephone.benchmarks.bloombench import (
|
|
149
|
+
load_bloombench_datasets, run_benchmark, run_comparison, run_hpo,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
datasets, _ = load_bloombench_datasets()
|
|
153
|
+
run_hpo(datasets) # one-time HPO
|
|
154
|
+
results = run_benchmark(seeds=[0, 1, 2], datasets_dict=datasets)
|
|
155
|
+
report = run_comparison(seeds=[0, 1, 2])
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
For the interactive flow with tables / heatmaps / critical-difference plots, see [`notebooks/bloombench_extended_hpo.ipynb`](notebooks/bloombench_extended_hpo.ipynb) (one-time HPO) and [`notebooks/bloombench_extended.ipynb`](notebooks/bloombench_extended.ipynb) (replication).
|
|
159
|
+
|
|
160
|
+
<br>
|
|
161
|
+
|
|
162
|
+
## Project Structure
|
|
163
|
+
|
|
164
|
+
```
|
|
165
|
+
.
|
|
166
|
+
├── src/pysephone/
|
|
167
|
+
│ ├── benchmarks/ # End-to-end benchmark suites (BloomBench, …)
|
|
168
|
+
│ ├── data/ # Data ingestion and sources (PEP725, GMU Cherry, USA-NPN, AgERA5)
|
|
169
|
+
│ ├── dataset/ # Observations, Dataset, Calendar, feature providers, registry
|
|
170
|
+
│ ├── evaluation/ # Evaluation logic and regression metrics
|
|
171
|
+
│ ├── models/ # Model implementations (CF, RF, LSTM, Hybrid, …)
|
|
172
|
+
│ ├── utils/ # Shared utilities
|
|
173
|
+
│ └── visualize/ # Visualisation helpers
|
|
174
|
+
├── notebooks/ # Jupyter notebooks for exploration and analysis
|
|
175
|
+
├── scripts/ # Standalone scripts
|
|
176
|
+
├── tests/ # Test suite
|
|
177
|
+
└── data/ # Raw and processed data (git-ignored)
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
<br>
|
|
181
|
+
|
|
182
|
+
## Notebooks
|
|
183
|
+
|
|
184
|
+
| Notebook | Description |
|
|
185
|
+
|---|---|
|
|
186
|
+
| `cherry_blossom_cf_models.ipynb` | Process-based model evaluation on GMU Cherry datasets |
|
|
187
|
+
| `cf_models_pep725_fruit_trees.ipynb` | CF model evaluation across PEP725 fruit tree species |
|
|
188
|
+
| `unusual_year_model_eval.ipynb` | Model comparison on climatologically unusual vs normal years |
|
|
189
|
+
| `unusual_seasons_*.ipynb` | Exploration of unusual seasons in GMU / PEP725 data |
|
|
190
|
+
| `dataset_adequacy_*.ipynb` | Sample sufficiency analysis per dataset |
|
|
191
|
+
| `lstm_cherry_exploration.ipynb` | LSTM model exploration on cherry blossom data |
|
|
192
|
+
| `model_exploration.ipynb` | General model exploration notebook |
|
|
193
|
+
| `pvtt_winter_wheat.ipynb` | PVTT model for winter wheat phenology |
|
|
194
|
+
|
|
195
|
+
<br>
|
|
196
|
+
|
|
197
|
+
## Dependencies
|
|
198
|
+
|
|
199
|
+
Base install: `pandas`, `numpy`, `scipy`, `scikit-learn`, `matplotlib`, `nlopt`, `tables`, `h5py`, `requests`, `requests-cache`, `retry-requests`, `tqdm`, `unidecode`, `platformdirs`.
|
|
200
|
+
|
|
201
|
+
Heavier and source-specific dependencies (`torch`, `xgboost`, `cdsapi`, `openmeteo-requests`, `geopandas`/`shapely`/`rasterio`, `earthengine-api`, …) are opt-in via the [extras](#installation) above.
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=64"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "pysephone"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "A Python library for using machine learning to predict the timing of phenological events in plants"
|
|
9
|
+
authors = [
|
|
10
|
+
{ name = "Ron van Bree", email = "ronvbree@gmail.com" }
|
|
11
|
+
]
|
|
12
|
+
license = { file = "LICENSE" }
|
|
13
|
+
readme = "README.md"
|
|
14
|
+
requires-python = ">=3.14"
|
|
15
|
+
keywords = [
|
|
16
|
+
"phenology",
|
|
17
|
+
"machine-learning",
|
|
18
|
+
"bloom",
|
|
19
|
+
"agriculture",
|
|
20
|
+
"remote-sensing",
|
|
21
|
+
]
|
|
22
|
+
classifiers = [
|
|
23
|
+
"Development Status :: 3 - Alpha",
|
|
24
|
+
"Intended Audience :: Science/Research",
|
|
25
|
+
"License :: OSI Approved :: MIT License",
|
|
26
|
+
"Programming Language :: Python :: 3",
|
|
27
|
+
"Topic :: Scientific/Engineering",
|
|
28
|
+
]
|
|
29
|
+
# Base install — lightweight core: datasets, observations, process-based and
|
|
30
|
+
# scikit-learn models, evaluation. Heavy / source-specific dependencies live in
|
|
31
|
+
# the optional-dependency groups below.
|
|
32
|
+
dependencies = [
|
|
33
|
+
"pandas",
|
|
34
|
+
"numpy>=1.23",
|
|
35
|
+
"scipy",
|
|
36
|
+
"scikit-learn",
|
|
37
|
+
"matplotlib",
|
|
38
|
+
"tqdm",
|
|
39
|
+
"requests",
|
|
40
|
+
"requests-cache",
|
|
41
|
+
"retry-requests",
|
|
42
|
+
"unidecode",
|
|
43
|
+
"nlopt",
|
|
44
|
+
"tables",
|
|
45
|
+
"h5py",
|
|
46
|
+
"platformdirs",
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
[project.optional-dependencies]
|
|
50
|
+
# Deep-learning models (LSTM, GRU, CNN, Transformer, hybrids, Beta/BSpline GDD).
|
|
51
|
+
deep = ["torch>=2.0"]
|
|
52
|
+
# Gradient-boosted-tree model.
|
|
53
|
+
boost = ["xgboost>=1.7"]
|
|
54
|
+
# AgERA5 (Copernicus CDS) downloading.
|
|
55
|
+
agera5 = ["cdsapi", "xarray", "netCDF4"]
|
|
56
|
+
# Open-Meteo ERA5 downloading.
|
|
57
|
+
openmeteo = ["openmeteo-requests"]
|
|
58
|
+
# Geospatial features (map visualizations, WorldClim rasters).
|
|
59
|
+
geo = ["geopandas", "shapely", "rasterio"]
|
|
60
|
+
# AlphaEarth embedding fetching via Google Earth Engine.
|
|
61
|
+
earthengine = ["earthengine-api"]
|
|
62
|
+
# Statistical model-comparison (Friedman/Nemenyi, critical-difference plots).
|
|
63
|
+
stats = ["scikit-posthocs", "autorank"]
|
|
64
|
+
# Everything — convenient for reproducing the full BloomBench benchmark.
|
|
65
|
+
all = ["pysephone[deep,boost,agera5,openmeteo,geo,earthengine,stats]"]
|
|
66
|
+
|
|
67
|
+
[project.urls]
|
|
68
|
+
Homepage = "https://github.com/ronvree/pysephone"
|
|
69
|
+
Repository = "https://github.com/ronvree/pysephone"
|
|
70
|
+
|
|
71
|
+
[tool.setuptools]
|
|
72
|
+
# Ship the license and the bundled-data attribution/NOTICE with every artifact.
|
|
73
|
+
license-files = ["LICENSE", "DATA_SOURCES.md"]
|
|
74
|
+
|
|
75
|
+
[tool.setuptools.packages.find]
|
|
76
|
+
where = ["src"]
|
|
77
|
+
|
|
78
|
+
[tool.setuptools.package-data]
|
|
79
|
+
pysephone = [
|
|
80
|
+
"data/**/*.csv",
|
|
81
|
+
"data/**/*.geojson",
|
|
82
|
+
"data/**/*.md",
|
|
83
|
+
]
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""pysephone — machine-learning models for predicting plant phenology.
|
|
2
|
+
|
|
3
|
+
The lightweight core (datasets, observations, season calendars, the feature-
|
|
4
|
+
provider interface) is exported here. Models live under ``pysephone.models``
|
|
5
|
+
and are loaded lazily so that importing the package does not pull heavyweight
|
|
6
|
+
optional dependencies such as PyTorch.
|
|
7
|
+
"""
|
|
8
|
+
from pysephone.dataset import Dataset, Observations
|
|
9
|
+
from pysephone.dataset.util.calendar import Calendar
|
|
10
|
+
from pysephone.dataset.util.provider import FeatureProvider
|
|
11
|
+
|
|
12
|
+
__all__ = ["Dataset", "Observations", "Calendar", "FeatureProvider"]
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""Benchmark suites for evaluating phenology models.
|
|
2
|
+
|
|
3
|
+
Each sub-module is a self-contained, reproducible benchmark: a curated
|
|
4
|
+
collection of datasets, a fixed split, a model registry, and an evaluation
|
|
5
|
+
loop.
|
|
6
|
+
|
|
7
|
+
See :mod:`pysephone.benchmarks.bloombench` for the BloomBench fruit-tree
|
|
8
|
+
flowering benchmark.
|
|
9
|
+
"""
|