groundsource 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Sharath Sivamalaisamy
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,3 @@
1
+ include LICENSE
2
+ include README.md
3
+ recursive-include groundsource/data *.shp *.shx *.dbf *.prj *.cpg
@@ -0,0 +1,150 @@
1
+ Metadata-Version: 2.4
2
+ Name: groundsource
3
+ Version: 0.1.0
4
+ Summary: Python package for Google's Groundsource flash flood dataset — 2.6M events, 150+ countries, 2000–2026
5
+ Author: Shara
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/sharanry/groundsource
8
+ Project-URL: Repository, https://github.com/sharanry/groundsource
9
+ Project-URL: Issues, https://github.com/sharanry/groundsource/issues
10
+ Keywords: flood,flash-flood,climate,groundsource,google,geospatial,dataset,gemini,natural-disaster,news-mining
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Scientific/Engineering :: GIS
21
+ Classifier: Topic :: Scientific/Engineering :: Atmospheric Science
22
+ Requires-Python: >=3.9
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: pandas>=1.5
26
+ Requires-Dist: pyarrow>=10.0
27
+ Requires-Dist: geopandas>=0.13
28
+ Requires-Dist: shapely>=2.0
29
+ Requires-Dist: matplotlib>=3.6
30
+ Provides-Extra: dev
31
+ Requires-Dist: pytest>=7.0; extra == "dev"
32
+ Dynamic: license-file
33
+
34
+ # groundsource
35
+
36
+ **Python package for Google's Groundsource flash flood dataset.**
37
+
38
+ Google used Gemini to extract 2.6 million flash flood events from news articles across 150+ countries (2000-2026). The raw data is a 667MB Parquet file with undocumented WKB geometries and no location labels. This package decodes the geometries, tags every event with country and continent, and provides a clean search and analysis API.
39
+
40
+ ```python
41
+ from groundsource import FloodDB
42
+
43
+ db = FloodDB() # auto-downloads + enriches on first run
44
+ floods = db.search(country="India", year_range=(2020, 2025))
45
+ ```
46
+
47
+ ## Installation
48
+
49
+ ```bash
50
+ pip install groundsource
51
+ ```
52
+
53
+ **Requirements:** Python 3.9+, pandas, pyarrow, geopandas, shapely, matplotlib
54
+
55
+ On first run, the package downloads the dataset from Zenodo (~667MB), decodes 2.6M WKB polygons, and performs a spatial join against Natural Earth boundaries. This takes 2-3 minutes and is cached locally for instant subsequent loads.
56
+
57
+ ## Usage
58
+
59
+ ### Search
60
+
61
+ ```python
62
+ from groundsource import FloodDB
63
+ db = FloodDB()
64
+
65
+ # By country (supports common aliases: "USA", "UK", "UAE", etc.)
66
+ db.search(country="India")
67
+ db.search(country="USA", year_range=(2020, 2025))
68
+
69
+ # By city (98 major cities built-in, default 100km radius)
70
+ db.search(city="Houston", radius_km=50)
71
+
72
+ # By continent or bounding box
73
+ db.search(continent="Asia")
74
+ db.search(bbox=[0, 95, 25, 120]) # [min_lat, min_lon, max_lat, max_lon]
75
+ ```
76
+
77
+ ### Trend Analysis
78
+
79
+ ```python
80
+ db.trend(country="India") # yearly event counts
81
+ db.growth(country="India") # growth rate between two periods
82
+ db.compare(["USA", "UK", "India", "Indonesia"]) # side-by-side comparison
83
+ db.top_countries(20) # ranked by total events
84
+ db.country_growth_ranking(20) # ranked by growth acceleration
85
+ db.bias_check() # global yearly counts for bias analysis
86
+ ```
87
+
88
+ ### Built-in Charts
89
+
90
+ ```python
91
+ db.plot_hockey_stick(save_path="hockey_stick.png")
92
+ db.plot_bias(save_path="bias.png")
93
+ db.plot_top_countries(save_path="top_countries.png")
94
+ db.plot_country_growth(save_path="growth.png")
95
+ ```
96
+
97
+ ### Raw DataFrame Access
98
+
99
+ ```python
100
+ df = db.to_dataframe()
101
+ # Columns: uuid, area_km2, start_date, end_date, centroid_lon, centroid_lat,
102
+ # country, iso_a3, continent, year
103
+ ```
104
+
105
+ ## What This Package Does
106
+
107
+ The raw Parquet from Zenodo has 5 columns with no documentation:
108
+
109
+ | Raw Column | Type | Issue |
110
+ |-----------|------|-------|
111
+ | `uuid` | string | ID only |
112
+ | `area_km2` | float | Usable as-is |
113
+ | `geometry` | WKB binary | Requires `shapely` to decode |
114
+ | `start_date` | string | Not parsed as datetime |
115
+ | `end_date` | string | Not parsed as datetime |
116
+
117
+ This package enriches each event with:
118
+
119
+ | Added Column | Source |
120
+ |-------------|--------|
121
+ | `centroid_lon`, `centroid_lat` | Decoded from WKB polygons |
122
+ | `country`, `iso_a3` | Spatial join against Natural Earth |
123
+ | `continent` | Natural Earth |
124
+ | `year` | Extracted from `start_date` |
125
+
126
+ ## Reporting Bias
127
+
128
+ The dataset shows 498 events in 2000 and 402,012 in 2024. This does not mean floods increased 807x. The data is extracted from news articles, and digital news coverage grew dramatically over this period. Any trend analysis should account for this reporting bias. Use `db.bias_check()` and `db.plot_bias()` to visualize this.
129
+
130
+ ![Bias Analysis](charts/02_bias_normalized.png)
131
+
132
+ ## Top Countries by Events Detected
133
+
134
+ ![Top Countries](charts/04_top_countries.png)
135
+
136
+ ## Dataset
137
+
138
+ - **Source:** [Google Groundsource](https://research.google/blog/introducing-groundsource-turning-news-reports-into-data-with-gemini/)
139
+ - **Download:** [Zenodo](https://zenodo.org/records/18647054) (CC BY 4.0)
140
+ - **Records:** 2,646,302 events across 175 countries, 2000-2026
141
+ - **Method:** Gemini parsed ~5M news articles
142
+ - **Accuracy:** 60% location+timing, 82% practically useful (per Google)
143
+
144
+ ## License
145
+
146
+ MIT. The underlying dataset is licensed CC BY 4.0 by Google.
147
+
148
+ ## Citation
149
+
150
+ > Google Research. *Groundsource: Turning News Reports into Data with Gemini.* Zenodo, 2026. DOI: [10.5281/zenodo.18647054](https://zenodo.org/records/18647054)
@@ -0,0 +1,117 @@
1
+ # groundsource
2
+
3
+ **Python package for Google's Groundsource flash flood dataset.**
4
+
5
+ Google used Gemini to extract 2.6 million flash flood events from news articles across 150+ countries (2000-2026). The raw data is a 667MB Parquet file with undocumented WKB geometries and no location labels. This package decodes the geometries, tags every event with country and continent, and provides a clean search and analysis API.
6
+
7
+ ```python
8
+ from groundsource import FloodDB
9
+
10
+ db = FloodDB() # auto-downloads + enriches on first run
11
+ floods = db.search(country="India", year_range=(2020, 2025))
12
+ ```
13
+
14
+ ## Installation
15
+
16
+ ```bash
17
+ pip install groundsource
18
+ ```
19
+
20
+ **Requirements:** Python 3.9+, pandas, pyarrow, geopandas, shapely, matplotlib
21
+
22
+ On first run, the package downloads the dataset from Zenodo (~667MB), decodes 2.6M WKB polygons, and performs a spatial join against Natural Earth boundaries. This takes 2-3 minutes and is cached locally for instant subsequent loads.
23
+
24
+ ## Usage
25
+
26
+ ### Search
27
+
28
+ ```python
29
+ from groundsource import FloodDB
30
+ db = FloodDB()
31
+
32
+ # By country (supports common aliases: "USA", "UK", "UAE", etc.)
33
+ db.search(country="India")
34
+ db.search(country="USA", year_range=(2020, 2025))
35
+
36
+ # By city (98 major cities built-in, default 100km radius)
37
+ db.search(city="Houston", radius_km=50)
38
+
39
+ # By continent or bounding box
40
+ db.search(continent="Asia")
41
+ db.search(bbox=[0, 95, 25, 120]) # [min_lat, min_lon, max_lat, max_lon]
42
+ ```
43
+
44
+ ### Trend Analysis
45
+
46
+ ```python
47
+ db.trend(country="India") # yearly event counts
48
+ db.growth(country="India") # growth rate between two periods
49
+ db.compare(["USA", "UK", "India", "Indonesia"]) # side-by-side comparison
50
+ db.top_countries(20) # ranked by total events
51
+ db.country_growth_ranking(20) # ranked by growth acceleration
52
+ db.bias_check() # global yearly counts for bias analysis
53
+ ```
54
+
55
+ ### Built-in Charts
56
+
57
+ ```python
58
+ db.plot_hockey_stick(save_path="hockey_stick.png")
59
+ db.plot_bias(save_path="bias.png")
60
+ db.plot_top_countries(save_path="top_countries.png")
61
+ db.plot_country_growth(save_path="growth.png")
62
+ ```
63
+
64
+ ### Raw DataFrame Access
65
+
66
+ ```python
67
+ df = db.to_dataframe()
68
+ # Columns: uuid, area_km2, start_date, end_date, centroid_lon, centroid_lat,
69
+ # country, iso_a3, continent, year
70
+ ```
71
+
72
+ ## What This Package Does
73
+
74
+ The raw Parquet from Zenodo has 5 columns with no documentation:
75
+
76
+ | Raw Column | Type | Issue |
77
+ |-----------|------|-------|
78
+ | `uuid` | string | ID only |
79
+ | `area_km2` | float | Usable as-is |
80
+ | `geometry` | WKB binary | Requires `shapely` to decode |
81
+ | `start_date` | string | Not parsed as datetime |
82
+ | `end_date` | string | Not parsed as datetime |
83
+
84
+ This package enriches each event with:
85
+
86
+ | Added Column | Source |
87
+ |-------------|--------|
88
+ | `centroid_lon`, `centroid_lat` | Decoded from WKB polygons |
89
+ | `country`, `iso_a3` | Spatial join against Natural Earth |
90
+ | `continent` | Natural Earth |
91
+ | `year` | Extracted from `start_date` |
92
+
93
+ ## Reporting Bias
94
+
95
+ The dataset shows 498 events in 2000 and 402,012 in 2024. This does not mean floods increased 807x. The data is extracted from news articles, and digital news coverage grew dramatically over this period. Any trend analysis should account for this reporting bias. Use `db.bias_check()` and `db.plot_bias()` to visualize this.
96
+
97
+ ![Bias Analysis](charts/02_bias_normalized.png)
98
+
99
+ ## Top Countries by Events Detected
100
+
101
+ ![Top Countries](charts/04_top_countries.png)
102
+
103
+ ## Dataset
104
+
105
+ - **Source:** [Google Groundsource](https://research.google/blog/introducing-groundsource-turning-news-reports-into-data-with-gemini/)
106
+ - **Download:** [Zenodo](https://zenodo.org/records/18647054) (CC BY 4.0)
107
+ - **Records:** 2,646,302 events across 175 countries, 2000-2026
108
+ - **Method:** Gemini parsed ~5M news articles
109
+ - **Accuracy:** 60% location+timing, 82% practically useful (per Google)
110
+
111
+ ## License
112
+
113
+ MIT. The underlying dataset is licensed CC BY 4.0 by Google.
114
+
115
+ ## Citation
116
+
117
+ > Google Research. *Groundsource: Turning News Reports into Data with Gemini.* Zenodo, 2026. DOI: [10.5281/zenodo.18647054](https://zenodo.org/records/18647054)
@@ -0,0 +1,6 @@
1
+ """groundsource — Python package for Google's Groundsource flash flood dataset."""
2
+
3
+ from groundsource.db import FloodDB
4
+
5
+ __version__ = "0.1.0"
6
+ __all__ = ["FloodDB"]
@@ -0,0 +1,63 @@
1
+ """Download and cache management for the Groundsource dataset."""
2
+
3
+ import os
4
+ import sys
5
+ import urllib.request
6
+ from pathlib import Path
7
+
8
+ ZENODO_URL = "https://zenodo.org/records/18647054/files/groundsource_2026.parquet?download=1"
9
+ PARQUET_FILENAME = "groundsource_2026.parquet"
10
+ ENRICHED_FILENAME = "groundsource_enriched.parquet"
11
+
12
+
13
+ def get_cache_dir() -> Path:
14
+ """Return platform-appropriate cache directory."""
15
+ if sys.platform == "win32":
16
+ base = Path(os.environ.get("LOCALAPPDATA", Path.home() / "AppData" / "Local"))
17
+ elif sys.platform == "darwin":
18
+ base = Path.home() / "Library" / "Caches"
19
+ else:
20
+ base = Path(os.environ.get("XDG_CACHE_HOME", Path.home() / ".cache"))
21
+ cache_dir = base / "groundsource"
22
+ cache_dir.mkdir(parents=True, exist_ok=True)
23
+ return cache_dir
24
+
25
+
26
+ def get_raw_parquet_path() -> Path:
27
+ return get_cache_dir() / PARQUET_FILENAME
28
+
29
+
30
+ def get_enriched_parquet_path() -> Path:
31
+ return get_cache_dir() / ENRICHED_FILENAME
32
+
33
+
34
+ def download_parquet(force: bool = False) -> Path:
35
+ """Download the raw Parquet from Zenodo if not already cached."""
36
+ path = get_raw_parquet_path()
37
+ if path.exists() and not force:
38
+ return path
39
+
40
+ print(f"Downloading Groundsource dataset ({PARQUET_FILENAME})...")
41
+ print(f"Source: {ZENODO_URL}")
42
+ print(f"Destination: {path}")
43
+
44
+ def _progress(block_num, block_size, total_size):
45
+ downloaded = block_num * block_size
46
+ if total_size > 0:
47
+ pct = min(100, downloaded * 100 / total_size)
48
+ mb = downloaded / (1024 * 1024)
49
+ total_mb = total_size / (1024 * 1024)
50
+ sys.stdout.write(f"\r {mb:.0f}/{total_mb:.0f} MB ({pct:.1f}%)")
51
+ sys.stdout.flush()
52
+
53
+ urllib.request.urlretrieve(ZENODO_URL, str(path), reporthook=_progress)
54
+ print("\n Download complete.")
55
+ return path
56
+
57
+
58
+ def load_from_local(path: str) -> Path:
59
+ """Use a local Parquet file instead of downloading."""
60
+ p = Path(path)
61
+ if not p.exists():
62
+ raise FileNotFoundError(f"Local parquet not found: {path}")
63
+ return p
@@ -0,0 +1,206 @@
1
+ """Chart generators for Groundsource analysis. LinkedIn-worthy matplotlib charts."""
2
+
3
+ import pandas as pd
4
+ import numpy as np
5
+ import matplotlib.pyplot as plt
6
+ import matplotlib.ticker as ticker
7
+
8
+
9
+ # Consistent style for all charts
10
+ STYLE = {
11
+ "figure.facecolor": "white",
12
+ "axes.facecolor": "#f8f9fa",
13
+ "axes.grid": True,
14
+ "grid.alpha": 0.3,
15
+ "font.family": "sans-serif",
16
+ "font.size": 11,
17
+ }
18
+
19
+
20
+ def _apply_style():
21
+ plt.rcParams.update(STYLE)
22
+
23
+
24
+ def plot_hockey_stick(yearly_counts: pd.DataFrame, save_path: str = None) -> plt.Figure:
25
+ """Chart 1: Total events per year — the 807x hockey stick.
26
+
27
+ yearly_counts: DataFrame with columns [year, count]
28
+ """
29
+ _apply_style()
30
+ fig, ax = plt.subplots(figsize=(12, 6))
31
+
32
+ ax.bar(yearly_counts["year"], yearly_counts["count"],
33
+ color="#1a73e8", alpha=0.85, width=0.8)
34
+
35
+ # Annotate the growth
36
+ first_year = yearly_counts.iloc[0]
37
+ peak_year = yearly_counts.loc[yearly_counts["count"].idxmax()]
38
+ growth = peak_year["count"] / first_year["count"] if first_year["count"] > 0 else 0
39
+
40
+ ax.annotate(
41
+ f'{first_year["count"]:,.0f} events',
42
+ xy=(first_year["year"], first_year["count"]),
43
+ xytext=(first_year["year"] + 3, peak_year["count"] * 0.3),
44
+ fontsize=10, color="#666",
45
+ arrowprops=dict(arrowstyle="->", color="#999"),
46
+ )
47
+ ax.annotate(
48
+ f'{peak_year["count"]:,.0f} events\n({growth:,.0f}x growth)',
49
+ xy=(peak_year["year"], peak_year["count"]),
50
+ xytext=(peak_year["year"] - 6, peak_year["count"] * 1.05),
51
+ fontsize=10, fontweight="bold", color="#d93025",
52
+ arrowprops=dict(arrowstyle="->", color="#d93025"),
53
+ )
54
+
55
+ ax.set_title("Flash Flood Events Detected Per Year\n— or is it news coverage?",
56
+ fontsize=16, fontweight="bold", pad=15)
57
+ ax.set_xlabel("Year", fontsize=12)
58
+ ax.set_ylabel("Events Detected", fontsize=12)
59
+ ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: f"{x:,.0f}"))
60
+
61
+ ax.text(0.02, 0.95,
62
+ "Source: Google Groundsource dataset (2.6M events from news articles, 2000–2026)",
63
+ transform=ax.transAxes, fontsize=8, color="#999", va="top")
64
+
65
+ plt.tight_layout()
66
+ if save_path:
67
+ fig.savefig(save_path, dpi=150, bbox_inches="tight")
68
+ print(f" Saved: {save_path}")
69
+ return fig
70
+
71
+
72
+ def plot_bias_normalized(yearly_counts: pd.DataFrame, save_path: str = None) -> plt.Figure:
73
+ """Chart 2: Overlay event growth vs estimated digital news growth.
74
+
75
+ Uses a simple exponential proxy for global digital news volume.
76
+ Internet users: ~400M (2000) → ~5.5B (2025) ≈ 13.75x
77
+ Online news output grew even faster due to digital-native outlets.
78
+ We use a conservative 15-20x estimate for indexed news articles.
79
+ """
80
+ _apply_style()
81
+ fig, ax = plt.subplots(figsize=(12, 6))
82
+
83
+ years = yearly_counts["year"].values
84
+ counts = yearly_counts["count"].values
85
+
86
+ # Normalize both to year 2007 (when dataset has enough events to be meaningful)
87
+ ref_idx = np.where(years == 2007)[0]
88
+ if len(ref_idx) == 0:
89
+ ref_idx = [7] # fallback
90
+ ref_idx = ref_idx[0]
91
+
92
+ norm_events = counts / counts[ref_idx]
93
+
94
+ # Conservative proxy: internet users grew ~14x from 2000 to 2025
95
+ # Online news articles grew faster. Use a simple logistic-like growth curve.
96
+ internet_users_billions = {
97
+ 2000: 0.41, 2001: 0.50, 2002: 0.63, 2003: 0.72, 2004: 0.82,
98
+ 2005: 1.02, 2006: 1.15, 2007: 1.37, 2008: 1.57, 2009: 1.77,
99
+ 2010: 2.02, 2011: 2.23, 2012: 2.49, 2013: 2.73, 2014: 2.96,
100
+ 2015: 3.19, 2016: 3.42, 2017: 3.65, 2018: 3.90, 2019: 4.13,
101
+ 2020: 4.59, 2021: 4.90, 2022: 5.16, 2023: 5.35, 2024: 5.52,
102
+ 2025: 5.56, 2026: 5.60,
103
+ }
104
+ news_proxy = np.array([internet_users_billions.get(y, 5.6) for y in years])
105
+ norm_news = news_proxy / news_proxy[ref_idx]
106
+
107
+ ax.plot(years, norm_events, "o-", color="#d93025", linewidth=2.5,
108
+ markersize=5, label="Detected flood events", zorder=3)
109
+ ax.plot(years, norm_news, "s--", color="#1a73e8", linewidth=2,
110
+ markersize=4, label="Internet users (proxy for digital news)", zorder=2)
111
+
112
+ ax.fill_between(years, norm_events, norm_news,
113
+ where=(norm_events > norm_news),
114
+ alpha=0.1, color="#d93025")
115
+
116
+ ax.set_title("Are Floods Increasing — or Is News Coverage?\nBoth curves normalized to 2007 = 1.0",
117
+ fontsize=15, fontweight="bold", pad=15)
118
+ ax.set_xlabel("Year", fontsize=12)
119
+ ax.set_ylabel("Growth relative to 2007", fontsize=12)
120
+ ax.legend(fontsize=11, loc="upper left")
121
+ ax.set_yscale("log")
122
+ ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: f"{x:.0f}x"))
123
+
124
+ ax.text(0.02, 0.02,
125
+ "Event detection grows faster than internet adoption — likely because\n"
126
+ "news digitization (articles going online) grew faster than raw user count.",
127
+ transform=ax.transAxes, fontsize=9, color="#666", va="bottom",
128
+ style="italic")
129
+
130
+ plt.tight_layout()
131
+ if save_path:
132
+ fig.savefig(save_path, dpi=150, bbox_inches="tight")
133
+ print(f" Saved: {save_path}")
134
+ return fig
135
+
136
+
137
+ def plot_country_growth(growth_df: pd.DataFrame, n: int = 20,
138
+ save_path: str = None) -> plt.Figure:
139
+ """Chart 3: Countries with highest growth factor — what might be real.
140
+
141
+ growth_df: from trends.country_growth_ranking()
142
+ """
143
+ _apply_style()
144
+ top = growth_df.head(n).copy()
145
+ top = top.sort_values("growth_factor", ascending=True) # horizontal bar, ascending
146
+
147
+ fig, ax = plt.subplots(figsize=(10, 8))
148
+
149
+ colors = ["#d93025" if g > 50 else "#ea8600" if g > 20 else "#1a73e8"
150
+ for g in top["growth_factor"]]
151
+
152
+ ax.barh(top["country"], top["growth_factor"], color=colors, alpha=0.85)
153
+
154
+ for i, (_, row) in enumerate(top.iterrows()):
155
+ ax.text(row["growth_factor"] + 0.5, i,
156
+ f'{row["growth_factor"]:.0f}x',
157
+ va="center", fontsize=9, fontweight="bold")
158
+
159
+ ax.set_title("Which Countries Show the Fastest Growth in Detected Flood Events?\n"
160
+ "Growth factor: avg events/year (2018–2025) vs (2005–2012)",
161
+ fontsize=14, fontweight="bold", pad=15)
162
+ ax.set_xlabel("Growth Factor (higher = faster acceleration)", fontsize=11)
163
+
164
+ ax.text(0.98, 0.02,
165
+ "⚠ High growth may reflect news digitization, not actual flood increase.\n"
166
+ "Countries with low baseline coverage will show inflated growth.",
167
+ transform=ax.transAxes, fontsize=8, color="#999",
168
+ ha="right", va="bottom", style="italic")
169
+
170
+ plt.tight_layout()
171
+ if save_path:
172
+ fig.savefig(save_path, dpi=150, bbox_inches="tight")
173
+ print(f" Saved: {save_path}")
174
+ return fig
175
+
176
+
177
+ def plot_top_countries(country_counts: pd.DataFrame, n: int = 20,
178
+ save_path: str = None) -> plt.Figure:
179
+ """Chart 4: Top N countries by total event count — the LinkedIn chart."""
180
+ _apply_style()
181
+ top = country_counts.head(n).copy()
182
+ top = top.sort_values("count", ascending=True)
183
+
184
+ fig, ax = plt.subplots(figsize=(10, 8))
185
+
186
+ ax.barh(top["country"], top["count"], color="#1a73e8", alpha=0.85)
187
+
188
+ for i, (_, row) in enumerate(top.iterrows()):
189
+ ax.text(row["count"] + 500, i,
190
+ f'{row["count"]:,.0f}',
191
+ va="center", fontsize=9)
192
+
193
+ ax.set_title("Top 20 Countries by Total Flash Flood Events Detected (2000–2026)",
194
+ fontsize=14, fontweight="bold", pad=15)
195
+ ax.set_xlabel("Total Events Detected", fontsize=11)
196
+ ax.xaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: f"{x:,.0f}"))
197
+
198
+ ax.text(0.98, 0.02,
199
+ "Source: Google Groundsource — 2.6M events extracted by Gemini from news articles",
200
+ transform=ax.transAxes, fontsize=8, color="#999", ha="right", va="bottom")
201
+
202
+ plt.tight_layout()
203
+ if save_path:
204
+ fig.savefig(save_path, dpi=150, bbox_inches="tight")
205
+ print(f" Saved: {save_path}")
206
+ return fig
@@ -0,0 +1 @@
1
+ # Data directory — contains Natural Earth shapefiles bundled with the package.
@@ -0,0 +1 @@
1
+ GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.017453292519943295]]