groundsource 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,6 @@
1
+ """groundsource — Python package for Google's Groundsource flash flood dataset."""
2
+
3
+ from groundsource.db import FloodDB
4
+
5
+ __version__ = "0.1.0"
6
+ __all__ = ["FloodDB"]
groundsource/cache.py ADDED
@@ -0,0 +1,63 @@
1
+ """Download and cache management for the Groundsource dataset."""
2
+
3
+ import os
4
+ import sys
5
+ import urllib.request
6
+ from pathlib import Path
7
+
8
+ ZENODO_URL = "https://zenodo.org/records/18647054/files/groundsource_2026.parquet?download=1"
9
+ PARQUET_FILENAME = "groundsource_2026.parquet"
10
+ ENRICHED_FILENAME = "groundsource_enriched.parquet"
11
+
12
+
13
+ def get_cache_dir() -> Path:
14
+ """Return platform-appropriate cache directory."""
15
+ if sys.platform == "win32":
16
+ base = Path(os.environ.get("LOCALAPPDATA", Path.home() / "AppData" / "Local"))
17
+ elif sys.platform == "darwin":
18
+ base = Path.home() / "Library" / "Caches"
19
+ else:
20
+ base = Path(os.environ.get("XDG_CACHE_HOME", Path.home() / ".cache"))
21
+ cache_dir = base / "groundsource"
22
+ cache_dir.mkdir(parents=True, exist_ok=True)
23
+ return cache_dir
24
+
25
+
26
+ def get_raw_parquet_path() -> Path:
27
+ return get_cache_dir() / PARQUET_FILENAME
28
+
29
+
30
+ def get_enriched_parquet_path() -> Path:
31
+ return get_cache_dir() / ENRICHED_FILENAME
32
+
33
+
34
+ def download_parquet(force: bool = False) -> Path:
35
+ """Download the raw Parquet from Zenodo if not already cached."""
36
+ path = get_raw_parquet_path()
37
+ if path.exists() and not force:
38
+ return path
39
+
40
+ print(f"Downloading Groundsource dataset ({PARQUET_FILENAME})...")
41
+ print(f"Source: {ZENODO_URL}")
42
+ print(f"Destination: {path}")
43
+
44
+ def _progress(block_num, block_size, total_size):
45
+ downloaded = block_num * block_size
46
+ if total_size > 0:
47
+ pct = min(100, downloaded * 100 / total_size)
48
+ mb = downloaded / (1024 * 1024)
49
+ total_mb = total_size / (1024 * 1024)
50
+ sys.stdout.write(f"\r {mb:.0f}/{total_mb:.0f} MB ({pct:.1f}%)")
51
+ sys.stdout.flush()
52
+
53
+ urllib.request.urlretrieve(ZENODO_URL, str(path), reporthook=_progress)
54
+ print("\n Download complete.")
55
+ return path
56
+
57
+
58
+ def load_from_local(path: str) -> Path:
59
+ """Use a local Parquet file instead of downloading."""
60
+ p = Path(path)
61
+ if not p.exists():
62
+ raise FileNotFoundError(f"Local parquet not found: {path}")
63
+ return p
groundsource/charts.py ADDED
@@ -0,0 +1,206 @@
1
+ """Chart generators for Groundsource analysis. LinkedIn-worthy matplotlib charts."""
2
+
3
+ import pandas as pd
4
+ import numpy as np
5
+ import matplotlib.pyplot as plt
6
+ import matplotlib.ticker as ticker
7
+
8
+
9
+ # Consistent style for all charts
10
+ STYLE = {
11
+ "figure.facecolor": "white",
12
+ "axes.facecolor": "#f8f9fa",
13
+ "axes.grid": True,
14
+ "grid.alpha": 0.3,
15
+ "font.family": "sans-serif",
16
+ "font.size": 11,
17
+ }
18
+
19
+
20
+ def _apply_style():
21
+ plt.rcParams.update(STYLE)
22
+
23
+
24
+ def plot_hockey_stick(yearly_counts: pd.DataFrame, save_path: str = None) -> plt.Figure:
25
+ """Chart 1: Total events per year — the 807x hockey stick.
26
+
27
+ yearly_counts: DataFrame with columns [year, count]
28
+ """
29
+ _apply_style()
30
+ fig, ax = plt.subplots(figsize=(12, 6))
31
+
32
+ ax.bar(yearly_counts["year"], yearly_counts["count"],
33
+ color="#1a73e8", alpha=0.85, width=0.8)
34
+
35
+ # Annotate the growth
36
+ first_year = yearly_counts.iloc[0]
37
+ peak_year = yearly_counts.loc[yearly_counts["count"].idxmax()]
38
+ growth = peak_year["count"] / first_year["count"] if first_year["count"] > 0 else 0
39
+
40
+ ax.annotate(
41
+ f'{first_year["count"]:,.0f} events',
42
+ xy=(first_year["year"], first_year["count"]),
43
+ xytext=(first_year["year"] + 3, peak_year["count"] * 0.3),
44
+ fontsize=10, color="#666",
45
+ arrowprops=dict(arrowstyle="->", color="#999"),
46
+ )
47
+ ax.annotate(
48
+ f'{peak_year["count"]:,.0f} events\n({growth:,.0f}x growth)',
49
+ xy=(peak_year["year"], peak_year["count"]),
50
+ xytext=(peak_year["year"] - 6, peak_year["count"] * 1.05),
51
+ fontsize=10, fontweight="bold", color="#d93025",
52
+ arrowprops=dict(arrowstyle="->", color="#d93025"),
53
+ )
54
+
55
+ ax.set_title("Flash Flood Events Detected Per Year\n— or is it news coverage?",
56
+ fontsize=16, fontweight="bold", pad=15)
57
+ ax.set_xlabel("Year", fontsize=12)
58
+ ax.set_ylabel("Events Detected", fontsize=12)
59
+ ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: f"{x:,.0f}"))
60
+
61
+ ax.text(0.02, 0.95,
62
+ "Source: Google Groundsource dataset (2.6M events from news articles, 2000–2026)",
63
+ transform=ax.transAxes, fontsize=8, color="#999", va="top")
64
+
65
+ plt.tight_layout()
66
+ if save_path:
67
+ fig.savefig(save_path, dpi=150, bbox_inches="tight")
68
+ print(f" Saved: {save_path}")
69
+ return fig
70
+
71
+
72
+ def plot_bias_normalized(yearly_counts: pd.DataFrame, save_path: str = None) -> plt.Figure:
73
+ """Chart 2: Overlay event growth vs estimated digital news growth.
74
+
75
+ Uses a simple exponential proxy for global digital news volume.
76
+ Internet users: ~400M (2000) → ~5.5B (2025) ≈ 13.75x
77
+ Online news output grew even faster due to digital-native outlets.
78
+ We use a conservative 15-20x estimate for indexed news articles.
79
+ """
80
+ _apply_style()
81
+ fig, ax = plt.subplots(figsize=(12, 6))
82
+
83
+ years = yearly_counts["year"].values
84
+ counts = yearly_counts["count"].values
85
+
86
+ # Normalize both to year 2007 (when dataset has enough events to be meaningful)
87
+ ref_idx = np.where(years == 2007)[0]
88
+ if len(ref_idx) == 0:
89
+ ref_idx = [7] # fallback
90
+ ref_idx = ref_idx[0]
91
+
92
+ norm_events = counts / counts[ref_idx]
93
+
94
+ # Conservative proxy: internet users grew ~14x from 2000 to 2025
95
+ # Online news articles grew faster. Use a simple logistic-like growth curve.
96
+ internet_users_billions = {
97
+ 2000: 0.41, 2001: 0.50, 2002: 0.63, 2003: 0.72, 2004: 0.82,
98
+ 2005: 1.02, 2006: 1.15, 2007: 1.37, 2008: 1.57, 2009: 1.77,
99
+ 2010: 2.02, 2011: 2.23, 2012: 2.49, 2013: 2.73, 2014: 2.96,
100
+ 2015: 3.19, 2016: 3.42, 2017: 3.65, 2018: 3.90, 2019: 4.13,
101
+ 2020: 4.59, 2021: 4.90, 2022: 5.16, 2023: 5.35, 2024: 5.52,
102
+ 2025: 5.56, 2026: 5.60,
103
+ }
104
+ news_proxy = np.array([internet_users_billions.get(y, 5.6) for y in years])
105
+ norm_news = news_proxy / news_proxy[ref_idx]
106
+
107
+ ax.plot(years, norm_events, "o-", color="#d93025", linewidth=2.5,
108
+ markersize=5, label="Detected flood events", zorder=3)
109
+ ax.plot(years, norm_news, "s--", color="#1a73e8", linewidth=2,
110
+ markersize=4, label="Internet users (proxy for digital news)", zorder=2)
111
+
112
+ ax.fill_between(years, norm_events, norm_news,
113
+ where=(norm_events > norm_news),
114
+ alpha=0.1, color="#d93025")
115
+
116
+ ax.set_title("Are Floods Increasing — or Is News Coverage?\nBoth curves normalized to 2007 = 1.0",
117
+ fontsize=15, fontweight="bold", pad=15)
118
+ ax.set_xlabel("Year", fontsize=12)
119
+ ax.set_ylabel("Growth relative to 2007", fontsize=12)
120
+ ax.legend(fontsize=11, loc="upper left")
121
+ ax.set_yscale("log")
122
+ ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: f"{x:.0f}x"))
123
+
124
+ ax.text(0.02, 0.02,
125
+ "Event detection grows faster than internet adoption — likely because\n"
126
+ "news digitization (articles going online) grew faster than raw user count.",
127
+ transform=ax.transAxes, fontsize=9, color="#666", va="bottom",
128
+ style="italic")
129
+
130
+ plt.tight_layout()
131
+ if save_path:
132
+ fig.savefig(save_path, dpi=150, bbox_inches="tight")
133
+ print(f" Saved: {save_path}")
134
+ return fig
135
+
136
+
137
+ def plot_country_growth(growth_df: pd.DataFrame, n: int = 20,
138
+ save_path: str = None) -> plt.Figure:
139
+ """Chart 3: Countries with highest growth factor — what might be real.
140
+
141
+ growth_df: from trends.country_growth_ranking()
142
+ """
143
+ _apply_style()
144
+ top = growth_df.head(n).copy()
145
+ top = top.sort_values("growth_factor", ascending=True) # horizontal bar, ascending
146
+
147
+ fig, ax = plt.subplots(figsize=(10, 8))
148
+
149
+ colors = ["#d93025" if g > 50 else "#ea8600" if g > 20 else "#1a73e8"
150
+ for g in top["growth_factor"]]
151
+
152
+ ax.barh(top["country"], top["growth_factor"], color=colors, alpha=0.85)
153
+
154
+ for i, (_, row) in enumerate(top.iterrows()):
155
+ ax.text(row["growth_factor"] + 0.5, i,
156
+ f'{row["growth_factor"]:.0f}x',
157
+ va="center", fontsize=9, fontweight="bold")
158
+
159
+ ax.set_title("Which Countries Show the Fastest Growth in Detected Flood Events?\n"
160
+ "Growth factor: avg events/year (2018–2025) vs (2005–2012)",
161
+ fontsize=14, fontweight="bold", pad=15)
162
+ ax.set_xlabel("Growth Factor (higher = faster acceleration)", fontsize=11)
163
+
164
+ ax.text(0.98, 0.02,
165
+ "⚠ High growth may reflect news digitization, not actual flood increase.\n"
166
+ "Countries with low baseline coverage will show inflated growth.",
167
+ transform=ax.transAxes, fontsize=8, color="#999",
168
+ ha="right", va="bottom", style="italic")
169
+
170
+ plt.tight_layout()
171
+ if save_path:
172
+ fig.savefig(save_path, dpi=150, bbox_inches="tight")
173
+ print(f" Saved: {save_path}")
174
+ return fig
175
+
176
+
177
+ def plot_top_countries(country_counts: pd.DataFrame, n: int = 20,
178
+ save_path: str = None) -> plt.Figure:
179
+ """Chart 4: Top N countries by total event count — the LinkedIn chart."""
180
+ _apply_style()
181
+ top = country_counts.head(n).copy()
182
+ top = top.sort_values("count", ascending=True)
183
+
184
+ fig, ax = plt.subplots(figsize=(10, 8))
185
+
186
+ ax.barh(top["country"], top["count"], color="#1a73e8", alpha=0.85)
187
+
188
+ for i, (_, row) in enumerate(top.iterrows()):
189
+ ax.text(row["count"] + 500, i,
190
+ f'{row["count"]:,.0f}',
191
+ va="center", fontsize=9)
192
+
193
+ ax.set_title("Top 20 Countries by Total Flash Flood Events Detected (2000–2026)",
194
+ fontsize=14, fontweight="bold", pad=15)
195
+ ax.set_xlabel("Total Events Detected", fontsize=11)
196
+ ax.xaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: f"{x:,.0f}"))
197
+
198
+ ax.text(0.98, 0.02,
199
+ "Source: Google Groundsource — 2.6M events extracted by Gemini from news articles",
200
+ transform=ax.transAxes, fontsize=8, color="#999", ha="right", va="bottom")
201
+
202
+ plt.tight_layout()
203
+ if save_path:
204
+ fig.savefig(save_path, dpi=150, bbox_inches="tight")
205
+ print(f" Saved: {save_path}")
206
+ return fig
@@ -0,0 +1 @@
1
+ # Data directory — contains Natural Earth shapefiles bundled with the package.
@@ -0,0 +1 @@
1
+ UTF-8
@@ -0,0 +1 @@
1
+ GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.017453292519943295]]
groundsource/db.py ADDED
@@ -0,0 +1,322 @@
1
+ """FloodDB — Main interface to the Groundsource flash flood dataset."""
2
+
3
+ import pandas as pd
4
+ import numpy as np
5
+
6
+ from groundsource.cache import (
7
+ download_parquet,
8
+ get_enriched_parquet_path,
9
+ load_from_local,
10
+ )
11
+ from groundsource.spatial import enrich_dataframe, search_by_radius, search_by_bbox
12
+ from groundsource import trends as _trends
13
+ from groundsource import charts as _charts
14
+
15
+ # Common country name aliases -> Natural Earth canonical name
16
+ _COUNTRY_ALIASES = {
17
+ "usa": "United States of America",
18
+ "us": "United States of America",
19
+ "united states": "United States of America",
20
+ "america": "United States of America",
21
+ "uk": "United Kingdom",
22
+ "britain": "United Kingdom",
23
+ "great britain": "United Kingdom",
24
+ "england": "United Kingdom",
25
+ "uae": "United Arab Emirates",
26
+ "south korea": "South Korea",
27
+ "korea": "South Korea",
28
+ "north korea": "North Korea",
29
+ "dr congo": "Dem. Rep. Congo",
30
+ "democratic republic of congo": "Dem. Rep. Congo",
31
+ "drc": "Dem. Rep. Congo",
32
+ "congo": "Dem. Rep. Congo",
33
+ "ivory coast": "Ivory Coast",
34
+ "cote d'ivoire": "Ivory Coast",
35
+ "czech republic": "Czechia",
36
+ "czechia": "Czechia",
37
+ "bosnia": "Bosnia and Herz.",
38
+ "dominican republic": "Dominican Rep.",
39
+ "central african republic": "Central African Rep.",
40
+ }
41
+
42
+ # Simple built-in city geocoding (major cities only, no external API needed)
43
+ # Format: city_name -> (lon, lat)
44
+ _CITY_COORDS = {
45
+ "houston": (-95.37, 29.76), "mumbai": (72.88, 19.08),
46
+ "jakarta": (106.85, -6.21), "lagos": (3.39, 6.52),
47
+ "dhaka": (90.41, 23.81), "bangkok": (100.50, 13.76),
48
+ "manila": (120.98, 14.60), "kolkata": (88.36, 22.57),
49
+ "chennai": (80.27, 13.08), "delhi": (77.21, 28.61),
50
+ "new york": (-74.01, 40.71), "los angeles": (-118.24, 34.05),
51
+ "chicago": (-87.63, 41.88), "miami": (-80.19, 25.76),
52
+ "london": (-0.13, 51.51), "paris": (2.35, 48.86),
53
+ "berlin": (13.40, 52.52), "rome": (12.50, 41.90),
54
+ "madrid": (-3.70, 40.42), "tokyo": (139.69, 35.69),
55
+ "beijing": (116.41, 39.90), "shanghai": (121.47, 31.23),
56
+ "sydney": (151.21, -33.87), "melbourne": (144.96, -37.81),
57
+ "sao paulo": (-46.63, -23.55), "rio de janeiro": (-43.17, -22.91),
58
+ "buenos aires": (-58.38, -34.60), "mexico city": (-99.13, 19.43),
59
+ "cairo": (31.24, 30.04), "nairobi": (36.82, -1.29),
60
+ "johannesburg": (28.05, -26.20), "cape town": (18.42, -33.93),
61
+ "istanbul": (28.98, 41.01), "moscow": (37.62, 55.76),
62
+ "dubai": (55.30, 25.20), "singapore": (103.85, 1.29),
63
+ "kuala lumpur": (101.69, 3.14), "hanoi": (105.85, 21.03),
64
+ "ho chi minh city": (106.63, 10.82), "seoul": (126.98, 37.57),
65
+ "osaka": (135.50, 34.69), "lima": (-77.04, -12.05),
66
+ "bogota": (-74.07, 4.71), "santiago": (-70.65, -33.45),
67
+ "accra": (-0.19, 5.56), "kinshasa": (15.31, -4.32),
68
+ "addis ababa": (38.75, 9.02), "dar es salaam": (39.27, -6.79),
69
+ "karachi": (67.01, 24.86), "lahore": (74.35, 31.56),
70
+ "islamabad": (73.05, 33.69), "kabul": (69.17, 34.53),
71
+ "kathmandu": (85.32, 27.72), "colombo": (79.86, 6.93),
72
+ "yangon": (96.15, 16.87), "phnom penh": (104.92, 11.56),
73
+ "taipei": (121.57, 25.03), "hong kong": (114.17, 22.32),
74
+ "amsterdam": (4.90, 52.37), "brussels": (4.35, 50.85),
75
+ "vienna": (16.37, 48.21), "zurich": (8.54, 47.38),
76
+ "munich": (11.58, 48.14), "milan": (9.19, 45.46),
77
+ "barcelona": (2.17, 41.39), "lisbon": (-9.14, 38.74),
78
+ "athens": (23.73, 37.98), "bucharest": (26.10, 44.43),
79
+ "warsaw": (21.01, 52.23), "prague": (14.42, 50.08),
80
+ "budapest": (19.04, 47.50), "stockholm": (18.07, 59.33),
81
+ "oslo": (10.75, 59.91), "copenhagen": (12.57, 55.68),
82
+ "helsinki": (24.94, 60.17), "dublin": (-6.26, 53.35),
83
+ "toronto": (-79.38, 43.65), "vancouver": (-123.12, 49.28),
84
+ "montreal": (-73.57, 45.50), "abuja": (7.49, 9.06),
85
+ "new orleans": (-90.07, 29.95), "denver": (-104.99, 39.74),
86
+ "atlanta": (-84.39, 33.75), "dallas": (-96.80, 32.78),
87
+ "seattle": (-122.33, 47.61), "san francisco": (-122.42, 37.77),
88
+ "phoenix": (-112.07, 33.45), "las vegas": (-115.14, 36.17),
89
+ "washington": (-77.04, 38.91), "boston": (-71.06, 42.36),
90
+ "philadelphia": (-75.17, 39.95), "detroit": (-83.05, 42.33),
91
+ "minneapolis": (-93.27, 44.98), "portland": (-122.68, 45.52),
92
+ "pittsburgh": (-79.99, 40.44), "st louis": (-90.20, 38.63),
93
+ "nashville": (-86.78, 36.16), "charlotte": (-80.84, 35.23),
94
+ }
95
+
96
+
97
+ def _resolve_country(name: str, df: pd.DataFrame) -> pd.Series:
98
+ """Resolve a country name to a boolean mask, handling aliases and fuzzy matching."""
99
+ lower = name.lower()
100
+
101
+ # Check aliases first
102
+ if lower in _COUNTRY_ALIASES:
103
+ canonical = _COUNTRY_ALIASES[lower]
104
+ return df["country"] == canonical
105
+
106
+ # Exact case-insensitive match
107
+ mask = df["country"].str.lower() == lower
108
+ if mask.sum() > 0:
109
+ return mask
110
+
111
+ # Substring match as fallback
112
+ return df["country"].str.contains(name, case=False, na=False)
113
+
114
+
115
+ class FloodDB:
116
+ """Interface to Google's Groundsource flash flood dataset (2.6M events, 2000–2026).
117
+
118
+ Usage:
119
+ from groundsource import FloodDB
120
+ db = FloodDB() # auto-downloads from Zenodo if not cached
121
+ floods = db.search(country="India")
122
+ trend = db.trend("India")
123
+ """
124
+
125
+ def __init__(self, local_path: str = None):
126
+ """Load the Groundsource dataset.
127
+
128
+ Args:
129
+ local_path: Path to a local copy of groundsource_2026.parquet.
130
+ If None, downloads from Zenodo and caches automatically.
131
+ """
132
+ enriched_path = get_enriched_parquet_path()
133
+
134
+ if enriched_path.exists():
135
+ print(f"Loading enriched dataset from cache...")
136
+ self._df = pd.read_parquet(enriched_path)
137
+ print(f" Loaded {len(self._df):,} events.")
138
+ else:
139
+ # Load raw parquet
140
+ if local_path:
141
+ raw_path = load_from_local(local_path)
142
+ else:
143
+ raw_path = download_parquet()
144
+
145
+ print("Loading raw dataset...")
146
+ raw_df = pd.read_parquet(raw_path)
147
+ print(f" Loaded {len(raw_df):,} raw events.")
148
+
149
+ # Enrich with spatial data
150
+ print("Enriching with country/continent data (one-time, ~2 min)...")
151
+ self._df = enrich_dataframe(raw_df)
152
+
153
+ # Cache enriched version
154
+ print(f" Caching enriched dataset to {enriched_path}...")
155
+ self._df.to_parquet(enriched_path, index=False)
156
+ print(" Cached. Future loads will be instant.")
157
+
158
+ @property
159
+ def df(self) -> pd.DataFrame:
160
+ """Access the full enriched DataFrame."""
161
+ return self._df
162
+
163
+ def __len__(self) -> int:
164
+ return len(self._df)
165
+
166
+ def __repr__(self) -> str:
167
+ years = f"{self._df['year'].min()}–{self._df['year'].max()}"
168
+ countries = self._df["country"].nunique()
169
+ return f"FloodDB({len(self._df):,} events, {years}, {countries} countries)"
170
+
171
+ def info(self) -> dict:
172
+ """Summary statistics about the dataset."""
173
+ df = self._df
174
+ return {
175
+ "total_events": len(df),
176
+ "date_range": (df["start_date"].min(), df["start_date"].max()),
177
+ "countries": df["country"].nunique(),
178
+ "continents": sorted(df["continent"].dropna().unique().tolist()),
179
+ "untagged_events": int(df["country"].isna().sum()),
180
+ "columns": list(df.columns),
181
+ }
182
+
183
+ def to_dataframe(self) -> pd.DataFrame:
184
+ """Return the full enriched dataset as a pandas DataFrame."""
185
+ return self._df.copy()
186
+
187
+ @staticmethod
188
+ def available_cities() -> list:
189
+ """List all city names available for city search."""
190
+ return sorted(_CITY_COORDS.keys())
191
+
192
+ def available_countries(self) -> list:
193
+ """List all country names in the dataset."""
194
+ return sorted(self._df["country"].dropna().unique().tolist())
195
+
196
+ # ── Search ──────────────────────────────────────────────────────────
197
+
198
+ def search(self, country: str = None, continent: str = None,
199
+ city: str = None, radius_km: float = 100,
200
+ bbox: list = None,
201
+ year_range: tuple = None) -> pd.DataFrame:
202
+ """Search for flood events by location and/or time.
203
+
204
+ Args:
205
+ country: Country name (e.g., "India", "United States of America")
206
+ continent: Continent name (e.g., "Asia", "Africa")
207
+ city: City name for radius search (e.g., "Houston")
208
+ radius_km: Radius in km for city search (default 100)
209
+ bbox: Bounding box [min_lat, min_lon, max_lat, max_lon]
210
+ year_range: Tuple of (start_year, end_year) inclusive
211
+
212
+ Returns:
213
+ Filtered DataFrame of matching events.
214
+ """
215
+ result = self._df
216
+
217
+ if year_range:
218
+ result = result[(result["year"] >= year_range[0]) & (result["year"] <= year_range[1])]
219
+
220
+ if continent:
221
+ result = result[result["continent"].str.lower().fillna("") == continent.lower()]
222
+
223
+ if country:
224
+ mask = _resolve_country(country, result)
225
+ result = result[mask]
226
+
227
+ if city:
228
+ city_lower = city.lower()
229
+ if city_lower in _CITY_COORDS:
230
+ lon, lat = _CITY_COORDS[city_lower]
231
+ result = search_by_radius(result, lon, lat, radius_km)
232
+ else:
233
+ raise ValueError(
234
+ f"City '{city}' not in built-in gazetteer. "
235
+ f"Use bbox=[min_lat, min_lon, max_lat, max_lon] or "
236
+ f"search(country=...) instead. "
237
+ f"Available cities: {len(_CITY_COORDS)}"
238
+ )
239
+
240
+ if bbox:
241
+ result = search_by_bbox(result, bbox)
242
+
243
+ return result.copy()
244
+
245
+ # ── Trend Analysis ──────────────────────────────────────────────────
246
+
247
+ def trend(self, country: str = None, continent: str = None) -> pd.DataFrame:
248
+ """Yearly event counts, optionally filtered by country/continent.
249
+
250
+ Returns DataFrame with columns: year, count
251
+ """
252
+ df = self._df
253
+ if country:
254
+ df = df[_resolve_country(country, df)]
255
+ if continent:
256
+ df = df[df["continent"].str.lower().fillna("") == continent.lower()]
257
+ return _trends.yearly_counts(df)
258
+
259
+ def growth(self, country: str = None, continent: str = None,
260
+ early: tuple = (2005, 2012), recent: tuple = (2018, 2025)) -> dict:
261
+ """Compare average yearly events between two periods."""
262
+ counts = self.trend(country=country, continent=continent)
263
+ return _trends.growth_rate(counts, early=early, recent=recent)
264
+
265
+ def compare(self, countries: list) -> pd.DataFrame:
266
+ """Side-by-side yearly counts for multiple countries.
267
+
268
+ Accepts aliases (e.g., "USA", "UK") and case-insensitive names.
269
+ """
270
+ # Resolve each country name to its canonical form
271
+ resolved = []
272
+ for c in countries:
273
+ lower = c.lower()
274
+ if lower in _COUNTRY_ALIASES:
275
+ resolved.append(_COUNTRY_ALIASES[lower])
276
+ else:
277
+ # Find best match in dataset
278
+ mask = self._df["country"].str.lower() == lower
279
+ if mask.sum() > 0:
280
+ resolved.append(self._df.loc[mask, "country"].iloc[0])
281
+ else:
282
+ resolved.append(c) # pass through, will show zeros
283
+ return _trends.compare_countries(self._df, resolved)
284
+
285
+ def top_countries(self, n: int = 20) -> pd.DataFrame:
286
+ """Top N countries by total event count."""
287
+ return _trends.top_countries(self._df, n=n)
288
+
289
+ def country_growth_ranking(self, n: int = 20, **kwargs) -> pd.DataFrame:
290
+ """Rank countries by flood event growth rate."""
291
+ return _trends.country_growth_ranking(self._df, n=n, **kwargs)
292
+
293
+ def bias_check(self) -> pd.DataFrame:
294
+ """Global yearly event counts for bias analysis.
295
+
296
+ Returns the hockey-stick curve that should be compared against
297
+ digital news growth. If the curves track together, the dataset's
298
+ growth reflects news digitization, not increasing floods.
299
+ """
300
+ return _trends.bias_check(self._df)
301
+
302
+ # ── Charts ──────────────────────────────────────────────────────────
303
+
304
+ def plot_hockey_stick(self, save_path: str = None):
305
+ """Chart 1: The 807x hockey stick — events per year."""
306
+ data = self.bias_check()
307
+ return _charts.plot_hockey_stick(data, save_path=save_path)
308
+
309
+ def plot_bias(self, save_path: str = None):
310
+ """Chart 2: Event growth vs internet/news growth."""
311
+ data = self.bias_check()
312
+ return _charts.plot_bias_normalized(data, save_path=save_path)
313
+
314
+ def plot_country_growth(self, n: int = 20, save_path: str = None, **kwargs):
315
+ """Chart 3: Countries with fastest flood event growth."""
316
+ data = self.country_growth_ranking(n=n, **kwargs)
317
+ return _charts.plot_country_growth(data, n=n, save_path=save_path)
318
+
319
+ def plot_top_countries(self, n: int = 20, save_path: str = None):
320
+ """Chart 4: Top N countries by total events — the LinkedIn chart."""
321
+ data = self.top_countries(n=n)
322
+ return _charts.plot_top_countries(data, n=n, save_path=save_path)
@@ -0,0 +1,131 @@
1
+ """Spatial operations: WKB decoding, country tagging, geocoding, search."""
2
+
3
+ from pathlib import Path
4
+ from math import radians, cos, sin, asin, sqrt
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+ import geopandas as gpd
9
+ from shapely import wkb
10
+ from shapely.geometry import Point
11
+
12
+
13
+ # Path to bundled Natural Earth shapefile
14
+ _DATA_DIR = Path(__file__).parent / "data"
15
+ _NE_SHP = _DATA_DIR / "ne_110m_admin_0_countries.shp"
16
+
17
+
18
+ def decode_centroids(geometry_series: pd.Series) -> pd.DataFrame:
19
+ """Decode WKB geometry bytes to centroid lon/lat columns.
20
+
21
+ Returns DataFrame with columns: centroid_lon, centroid_lat
22
+ """
23
+ lons = np.empty(len(geometry_series), dtype=np.float64)
24
+ lats = np.empty(len(geometry_series), dtype=np.float64)
25
+
26
+ for i, geom_bytes in enumerate(geometry_series):
27
+ geom = wkb.loads(geom_bytes)
28
+ c = geom.centroid
29
+ lons[i] = c.x
30
+ lats[i] = c.y
31
+
32
+ return pd.DataFrame({"centroid_lon": lons, "centroid_lat": lats})
33
+
34
+
35
+ def tag_countries(lons: np.ndarray, lats: np.ndarray) -> pd.DataFrame:
36
+ """Spatial join centroids against Natural Earth to get country/continent.
37
+
38
+ Returns DataFrame with columns: country, iso_a3, continent
39
+ """
40
+ if not _NE_SHP.exists():
41
+ raise FileNotFoundError(
42
+ f"Natural Earth shapefile not found at {_NE_SHP}. "
43
+ "Please reinstall the groundsource package."
44
+ )
45
+
46
+ world = gpd.read_file(_NE_SHP)[["NAME", "ISO_A3", "CONTINENT", "geometry"]]
47
+
48
+ points = gpd.GeoDataFrame(
49
+ {"idx": np.arange(len(lons))},
50
+ geometry=gpd.points_from_xy(lons, lats),
51
+ crs="EPSG:4326",
52
+ )
53
+
54
+ joined = gpd.sjoin(points, world, how="left", predicate="within")
55
+
56
+ # Handle duplicates from sjoin (point on border matched to multiple countries)
57
+ joined = joined.drop_duplicates(subset="idx", keep="first")
58
+ joined = joined.sort_values("idx").reset_index(drop=True)
59
+
60
+ return pd.DataFrame({
61
+ "country": joined["NAME"].values,
62
+ "iso_a3": joined["ISO_A3"].values,
63
+ "continent": joined["CONTINENT"].values,
64
+ })
65
+
66
+
67
+ def enrich_dataframe(df: pd.DataFrame) -> pd.DataFrame:
68
+ """Add centroid_lon, centroid_lat, country, iso_a3, continent to DataFrame."""
69
+ print(" Decoding WKB geometries (2.6M polygons)...")
70
+ centroids = decode_centroids(df["geometry"])
71
+
72
+ print(" Tagging countries via spatial join...")
73
+ countries = tag_countries(centroids["centroid_lon"].values, centroids["centroid_lat"].values)
74
+
75
+ result = df.copy()
76
+ result["centroid_lon"] = centroids["centroid_lon"].values
77
+ result["centroid_lat"] = centroids["centroid_lat"].values
78
+ result["country"] = countries["country"].values
79
+ result["iso_a3"] = countries["iso_a3"].values
80
+ result["continent"] = countries["continent"].values
81
+
82
+ # Parse dates
83
+ result["start_date"] = pd.to_datetime(result["start_date"])
84
+ result["end_date"] = pd.to_datetime(result["end_date"])
85
+ result["year"] = result["start_date"].dt.year
86
+
87
+ # Drop raw geometry bytes (no longer needed after enrichment)
88
+ result = result.drop(columns=["geometry"])
89
+
90
+ untagged = result["country"].isna().sum()
91
+ pct = untagged / len(result) * 100
92
+ print(f" Enrichment complete. {untagged:,} events ({pct:.1f}%) "
93
+ f"fell outside country boundaries (ocean/border).")
94
+
95
+ return result
96
+
97
+
98
+ def haversine_km(lon1, lat1, lon2, lat2):
99
+ """Haversine distance in km between two points."""
100
+ lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
101
+ dlon = lon2 - lon1
102
+ dlat = lat2 - lat1
103
+ a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
104
+ return 6371 * 2 * asin(sqrt(a))
105
+
106
+
107
+ def search_by_radius(df: pd.DataFrame, lon: float, lat: float, radius_km: float) -> pd.DataFrame:
108
+ """Filter events within radius_km of a point using vectorized haversine."""
109
+ lon1 = np.radians(df["centroid_lon"].values)
110
+ lat1 = np.radians(df["centroid_lat"].values)
111
+ lon2 = radians(lon)
112
+ lat2 = radians(lat)
113
+
114
+ dlat = lat2 - lat1
115
+ dlon = lon2 - lon1
116
+ a = np.sin(dlat / 2) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2) ** 2
117
+ dist_km = 6371 * 2 * np.arcsin(np.sqrt(a))
118
+
119
+ return df[dist_km <= radius_km].copy()
120
+
121
+
122
+ def search_by_bbox(df: pd.DataFrame, bbox: list) -> pd.DataFrame:
123
+ """Filter events within bounding box [min_lat, min_lon, max_lat, max_lon]."""
124
+ min_lat, min_lon, max_lat, max_lon = bbox
125
+ mask = (
126
+ (df["centroid_lat"] >= min_lat)
127
+ & (df["centroid_lat"] <= max_lat)
128
+ & (df["centroid_lon"] >= min_lon)
129
+ & (df["centroid_lon"] <= max_lon)
130
+ )
131
+ return df[mask].copy()
groundsource/trends.py ADDED
@@ -0,0 +1,113 @@
1
+ """Trend analysis and bias detection for flood event data."""
2
+
3
+ import pandas as pd
4
+ import numpy as np
5
+
6
+
7
+ def yearly_counts(df: pd.DataFrame, group_col: str = None, group_val: str = None) -> pd.DataFrame:
8
+ """Count events per year, optionally filtered by a group column.
9
+
10
+ Returns DataFrame with columns: year, count
11
+ """
12
+ subset = df
13
+ if group_col and group_val:
14
+ subset = df[df[group_col] == group_val]
15
+
16
+ counts = subset.groupby("year").size().reset_index(name="count")
17
+ return counts
18
+
19
+
20
+ def growth_rate(counts: pd.DataFrame, early=(2000, 2012), recent=(2015, 2025)) -> dict:
21
+ """Compare average yearly counts between two periods.
22
+
23
+ Returns dict with early_avg, recent_avg, growth_factor.
24
+ """
25
+ early_avg = counts[(counts["year"] >= early[0]) & (counts["year"] <= early[1])]["count"].mean()
26
+ recent_avg = counts[(counts["year"] >= recent[0]) & (counts["year"] <= recent[1])]["count"].mean()
27
+
28
+ if early_avg == 0 or pd.isna(early_avg):
29
+ factor = float("inf") if recent_avg > 0 else 0
30
+ else:
31
+ factor = recent_avg / early_avg
32
+
33
+ return {
34
+ "early_period": early,
35
+ "recent_period": recent,
36
+ "early_avg": float(round(early_avg, 1)) if not pd.isna(early_avg) else 0.0,
37
+ "recent_avg": float(round(recent_avg, 1)) if not pd.isna(recent_avg) else 0.0,
38
+ "growth_factor": float(round(factor, 1)),
39
+ }
40
+
41
+
42
+ def top_countries(df: pd.DataFrame, n: int = 20) -> pd.DataFrame:
43
+ """Top N countries by total event count."""
44
+ counts = df.groupby("country").size().reset_index(name="count")
45
+ return counts.sort_values("count", ascending=False).head(n).reset_index(drop=True)
46
+
47
+
48
+ def country_growth_ranking(df: pd.DataFrame, n: int = 20,
49
+ early=(2005, 2012), recent=(2018, 2025),
50
+ min_early_events: int = 10) -> pd.DataFrame:
51
+ """Rank countries by flood event growth rate between two periods.
52
+
53
+ Filters to countries with at least min_early_events in the early period
54
+ to avoid divide-by-zero / noise from tiny samples.
55
+ """
56
+ early_counts = (
57
+ df[(df["year"] >= early[0]) & (df["year"] <= early[1])]
58
+ .groupby("country").size().reset_index(name="early_count")
59
+ )
60
+ recent_counts = (
61
+ df[(df["year"] >= recent[0]) & (df["year"] <= recent[1])]
62
+ .groupby("country").size().reset_index(name="recent_count")
63
+ )
64
+
65
+ merged = early_counts.merge(recent_counts, on="country", how="inner")
66
+ merged = merged[merged["early_count"] >= min_early_events]
67
+
68
+ early_years = early[1] - early[0] + 1
69
+ recent_years = recent[1] - recent[0] + 1
70
+ merged["early_avg"] = merged["early_count"] / early_years
71
+ merged["recent_avg"] = merged["recent_count"] / recent_years
72
+ merged["growth_factor"] = merged["recent_avg"] / merged["early_avg"]
73
+
74
+ return merged.sort_values("growth_factor", ascending=False).head(n).reset_index(drop=True)
75
+
76
+
77
+ def bias_check(df: pd.DataFrame) -> pd.DataFrame:
78
+ """Return global yearly event counts for bias analysis.
79
+
80
+ The returned DataFrame has columns: year, count.
81
+ Users should plot this and compare against digital news growth curves.
82
+ A hockey-stick shape matching internet/news growth indicates
83
+ reporting bias, not necessarily increasing flood frequency.
84
+ """
85
+ counts = df.groupby("year").size().reset_index(name="count")
86
+
87
+ # Add some context columns
88
+ base_year = counts.loc[counts["year"].idxmin(), "count"]
89
+ counts["cumulative_growth"] = counts["count"] / base_year if base_year > 0 else 0
90
+
91
+ return counts
92
+
93
+
94
+ def compare_countries(df: pd.DataFrame, countries: list) -> pd.DataFrame:
95
+ """Side-by-side yearly counts for multiple countries.
96
+
97
+ Returns DataFrame with columns: year, country1, country2, ...
98
+ """
99
+ result = None
100
+ for country in countries:
101
+ counts = yearly_counts(df, group_col="country", group_val=country)
102
+ counts = counts.rename(columns={"count": country})
103
+ if result is None:
104
+ result = counts
105
+ else:
106
+ result = result.merge(counts, on="year", how="outer")
107
+
108
+ if result is not None:
109
+ result = result.sort_values("year").fillna(0)
110
+ for col in countries:
111
+ result[col] = result[col].astype(int)
112
+
113
+ return result
@@ -0,0 +1,150 @@
1
+ Metadata-Version: 2.4
2
+ Name: groundsource
3
+ Version: 0.1.0
4
+ Summary: Python package for Google's Groundsource flash flood dataset — 2.6M events, 150+ countries, 2000–2026
5
+ Author: Shara
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/sharanry/groundsource
8
+ Project-URL: Repository, https://github.com/sharanry/groundsource
9
+ Project-URL: Issues, https://github.com/sharanry/groundsource/issues
10
+ Keywords: flood,flash-flood,climate,groundsource,google,geospatial,dataset,gemini,natural-disaster,news-mining
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Scientific/Engineering :: GIS
21
+ Classifier: Topic :: Scientific/Engineering :: Atmospheric Science
22
+ Requires-Python: >=3.9
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: pandas>=1.5
26
+ Requires-Dist: pyarrow>=10.0
27
+ Requires-Dist: geopandas>=0.13
28
+ Requires-Dist: shapely>=2.0
29
+ Requires-Dist: matplotlib>=3.6
30
+ Provides-Extra: dev
31
+ Requires-Dist: pytest>=7.0; extra == "dev"
32
+ Dynamic: license-file
33
+
34
+ # groundsource
35
+
36
+ **Python package for Google's Groundsource flash flood dataset.**
37
+
38
+ Google used Gemini to extract 2.6 million flash flood events from news articles across 150+ countries (2000-2026). The raw data is a 667MB Parquet file with undocumented WKB geometries and no location labels. This package decodes the geometries, tags every event with country and continent, and provides a clean search and analysis API.
39
+
40
+ ```python
41
+ from groundsource import FloodDB
42
+
43
+ db = FloodDB() # auto-downloads + enriches on first run
44
+ floods = db.search(country="India", year_range=(2020, 2025))
45
+ ```
46
+
47
+ ## Installation
48
+
49
+ ```bash
50
+ pip install groundsource
51
+ ```
52
+
53
+ **Requirements:** Python 3.9+, pandas, pyarrow, geopandas, shapely, matplotlib
54
+
55
+ On first run, the package downloads the dataset from Zenodo (~667MB), decodes 2.6M WKB polygons, and performs a spatial join against Natural Earth boundaries. This takes 2-3 minutes and is cached locally for instant subsequent loads.
56
+
57
+ ## Usage
58
+
59
+ ### Search
60
+
61
+ ```python
62
+ from groundsource import FloodDB
63
+ db = FloodDB()
64
+
65
+ # By country (supports common aliases: "USA", "UK", "UAE", etc.)
66
+ db.search(country="India")
67
+ db.search(country="USA", year_range=(2020, 2025))
68
+
69
+ # By city (98 major cities built-in, default 100km radius)
70
+ db.search(city="Houston", radius_km=50)
71
+
72
+ # By continent or bounding box
73
+ db.search(continent="Asia")
74
+ db.search(bbox=[0, 95, 25, 120]) # [min_lat, min_lon, max_lat, max_lon]
75
+ ```
76
+
77
+ ### Trend Analysis
78
+
79
+ ```python
80
+ db.trend(country="India") # yearly event counts
81
+ db.growth(country="India") # growth rate between two periods
82
+ db.compare(["USA", "UK", "India", "Indonesia"]) # side-by-side comparison
83
+ db.top_countries(20) # ranked by total events
84
+ db.country_growth_ranking(20) # ranked by growth acceleration
85
+ db.bias_check() # global yearly counts for bias analysis
86
+ ```
87
+
88
+ ### Built-in Charts
89
+
90
+ ```python
91
+ db.plot_hockey_stick(save_path="hockey_stick.png")
92
+ db.plot_bias(save_path="bias.png")
93
+ db.plot_top_countries(save_path="top_countries.png")
94
+ db.plot_country_growth(save_path="growth.png")
95
+ ```
96
+
97
+ ### Raw DataFrame Access
98
+
99
+ ```python
100
+ df = db.to_dataframe()
101
+ # Columns: uuid, area_km2, start_date, end_date, centroid_lon, centroid_lat,
102
+ # country, iso_a3, continent, year
103
+ ```
104
+
105
+ ## What This Package Does
106
+
107
+ The raw Parquet from Zenodo has 5 columns with no documentation:
108
+
109
+ | Raw Column | Type | Issue |
110
+ |-----------|------|-------|
111
+ | `uuid` | string | ID only |
112
+ | `area_km2` | float | Usable as-is |
113
+ | `geometry` | WKB binary | Requires `shapely` to decode |
114
+ | `start_date` | string | Not parsed as datetime |
115
+ | `end_date` | string | Not parsed as datetime |
116
+
117
+ This package enriches each event with:
118
+
119
+ | Added Column | Source |
120
+ |-------------|--------|
121
+ | `centroid_lon`, `centroid_lat` | Decoded from WKB polygons |
122
+ | `country`, `iso_a3` | Spatial join against Natural Earth |
123
+ | `continent` | Natural Earth |
124
+ | `year` | Extracted from `start_date` |
125
+
126
+ ## Reporting Bias
127
+
128
+ The dataset shows 498 events in 2000 and 402,012 in 2024. This does not mean floods increased 807x. The data is extracted from news articles, and digital news coverage grew dramatically over this period. Any trend analysis should account for this reporting bias. Use `db.bias_check()` and `db.plot_bias()` to visualize this.
129
+
130
+ ![Bias Analysis](charts/02_bias_normalized.png)
131
+
132
+ ## Top Countries by Events Detected
133
+
134
+ ![Top Countries](charts/04_top_countries.png)
135
+
136
+ ## Dataset
137
+
138
+ - **Source:** [Google Groundsource](https://research.google/blog/introducing-groundsource-turning-news-reports-into-data-with-gemini/)
139
+ - **Download:** [Zenodo](https://zenodo.org/records/18647054) (CC BY 4.0)
140
+ - **Records:** 2,646,302 events across 175 countries, 2000-2026
141
+ - **Method:** Gemini parsed ~5M news articles
142
+ - **Accuracy:** 60% location+timing, 82% practically useful (per Google)
143
+
144
+ ## License
145
+
146
+ MIT. The underlying dataset is licensed CC BY 4.0 by Google.
147
+
148
+ ## Citation
149
+
150
+ > Google Research. *Groundsource: Turning News Reports into Data with Gemini.* Zenodo, 2026. DOI: [10.5281/zenodo.18647054](https://zenodo.org/records/18647054)
@@ -0,0 +1,17 @@
1
+ groundsource/__init__.py,sha256=inQIRcjV65ACtxIrYFrP5o8Mi5YpuV2ce6e96i983_c,167
2
+ groundsource/cache.py,sha256=tbvKJSHBGrERVri92WZYGyVczya-i2vqBWwQF2C05LQ,2082
3
+ groundsource/charts.py,sha256=0g8KhGwi7YnQHSVvZvJxxcpWWT-P_DvlwZDsFJjOcPg,8084
4
+ groundsource/db.py,sha256=8QWs4Ld-hzGJpALWhCG-BTK_XNfHHVlI3qve09GLuec,13644
5
+ groundsource/spatial.py,sha256=QfjCUi2ANavv2hyoEhX2TQAL9tckyiVAoCuDgi5Whr4,4591
6
+ groundsource/trends.py,sha256=8olS2F0epwZE1KWDLyb7MUdlNgeXvsso6yz2jkqjGDY,4260
7
+ groundsource/data/__init__.py,sha256=fMw6NvHh0R15QoK3Nvsn6UxML3TEmfqc38TWzQDpED4,81
8
+ groundsource/data/ne_110m_admin_0_countries.cpg,sha256=OtMDH1UDpEBK-CUmLugjLMBNTqZoPULF3QovKiesmCQ,5
9
+ groundsource/data/ne_110m_admin_0_countries.dbf,sha256=H-5nfNTgOzZ4duA4YesQGX5AIqhGv5IGDgMTQyhjeFs,531808
10
+ groundsource/data/ne_110m_admin_0_countries.prj,sha256=Mlnw5VKQqCsTUGRvYE6KfuHiE2wDIKQPrYOKtAgZ__g,147
11
+ groundsource/data/ne_110m_admin_0_countries.shp,sha256=CONBYG6DkeRYw_CN6zEt5mS1a_rjdgZMWqCu5mgaX1U,180924
12
+ groundsource/data/ne_110m_admin_0_countries.shx,sha256=iwvirZfdSEruXC68mGl9U3LoMriuWKNaZhru9rmFZo0,1516
13
+ groundsource-0.1.0.dist-info/licenses/LICENSE,sha256=J_TEk_POk6Le3dSCQUYuAbyVwpqGP-4RTjT6cQMF16k,1078
14
+ groundsource-0.1.0.dist-info/METADATA,sha256=bYwQCMtjuaE9q17dvIeqRZ3ACZCyXW7NSRkkibUP5C4,5680
15
+ groundsource-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
16
+ groundsource-0.1.0.dist-info/top_level.txt,sha256=x8jiZoCtnyZzmlaeNj9l_KeNusbMFG6ZFYTDcPoH1Bk,13
17
+ groundsource-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Sharath Sivamalaisamy
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ groundsource