giga-spatial 0.6.3__tar.gz → 0.6.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/.env_sample +2 -0
  2. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/CHANGELOG.md +36 -0
  3. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/PKG-INFO +1 -1
  4. giga_spatial-0.6.4/docs/changelog.md +1 -0
  5. giga_spatial-0.6.4/docs/user-guide/configuration.md +130 -0
  6. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/giga_spatial.egg-info/PKG-INFO +1 -1
  7. giga_spatial-0.6.4/gigaspatial/__init__.py +1 -0
  8. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/config.py +6 -0
  9. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/handlers/__init__.py +5 -1
  10. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/handlers/boundaries.py +196 -43
  11. giga_spatial-0.6.4/gigaspatial/handlers/giga.py +786 -0
  12. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/handlers/hdx.py +50 -51
  13. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/handlers/maxar_image.py +1 -2
  14. giga_spatial-0.6.3/docs/changelog.md +0 -21
  15. giga_spatial-0.6.3/docs/user-guide/configuration.md +0 -76
  16. giga_spatial-0.6.3/gigaspatial/__init__.py +0 -1
  17. giga_spatial-0.6.3/gigaspatial/handlers/giga.py +0 -145
  18. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/CODE_OF_CONDUCT.md +0 -0
  19. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/CONTRIBUTING.md +0 -0
  20. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/LICENSE +0 -0
  21. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/MANIFEST.in +0 -0
  22. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/PULL_REQUEST_TEMPLATE.md +0 -0
  23. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/README.md +0 -0
  24. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/docs/.DS_Store +0 -0
  25. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/docs/api/core.md +0 -0
  26. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/docs/api/generators.md +0 -0
  27. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/docs/api/grid.md +0 -0
  28. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/docs/api/handlers.md +0 -0
  29. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/docs/api/index.md +0 -0
  30. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/docs/api/processing.md +0 -0
  31. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/docs/assets/GIGA_horizontal_notext_white.webp +0 -0
  32. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/docs/assets/datasets.png +0 -0
  33. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/docs/assets/logo.png +0 -0
  34. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/docs/contributing.md +0 -0
  35. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/docs/examples/advanced.md +0 -0
  36. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/docs/examples/basic.md +0 -0
  37. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/docs/examples/downloading/ghsl.md +0 -0
  38. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/docs/examples/downloading/osm.md +0 -0
  39. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/docs/examples/index.md +0 -0
  40. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/docs/examples/processing/tif.md +0 -0
  41. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/docs/examples/use-cases.md +0 -0
  42. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/docs/getting-started/installation.md +0 -0
  43. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/docs/getting-started/quickstart.md +0 -0
  44. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/docs/index.md +0 -0
  45. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/docs/license.md +0 -0
  46. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/docs/stylesheets/extra.css +0 -0
  47. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/docs/user-guide/index.md +0 -0
  48. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/giga_spatial.egg-info/SOURCES.txt +0 -0
  49. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/giga_spatial.egg-info/dependency_links.txt +0 -0
  50. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/giga_spatial.egg-info/requires.txt +0 -0
  51. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/giga_spatial.egg-info/top_level.txt +0 -0
  52. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/core/__init__.py +0 -0
  53. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/core/io/__init__.py +0 -0
  54. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/core/io/adls_data_store.py +0 -0
  55. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/core/io/data_api.py +0 -0
  56. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/core/io/data_store.py +0 -0
  57. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/core/io/local_data_store.py +0 -0
  58. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/core/io/readers.py +0 -0
  59. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/core/io/writers.py +0 -0
  60. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/core/schemas/__init__.py +0 -0
  61. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/core/schemas/entity.py +0 -0
  62. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/generators/__init__.py +0 -0
  63. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/generators/poi.py +0 -0
  64. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/generators/zonal/__init__.py +0 -0
  65. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/generators/zonal/base.py +0 -0
  66. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/generators/zonal/geometry.py +0 -0
  67. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/generators/zonal/mercator.py +0 -0
  68. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/grid/__init__.py +0 -0
  69. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/grid/mercator_tiles.py +0 -0
  70. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/handlers/base.py +0 -0
  71. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/handlers/ghsl.py +0 -0
  72. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/handlers/google_open_buildings.py +0 -0
  73. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/handlers/mapbox_image.py +0 -0
  74. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/handlers/microsoft_global_buildings.py +0 -0
  75. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/handlers/ookla_speedtest.py +0 -0
  76. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/handlers/opencellid.py +0 -0
  77. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/handlers/osm.py +0 -0
  78. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/handlers/overture.py +0 -0
  79. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/handlers/rwi.py +0 -0
  80. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/handlers/unicef_georepo.py +0 -0
  81. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/handlers/worldpop.py +0 -0
  82. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/processing/__init__.py +0 -0
  83. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/processing/geo.py +0 -0
  84. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/processing/sat_images.py +0 -0
  85. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/processing/tif_processor.py +0 -0
  86. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/gigaspatial/processing/utils.py +0 -0
  87. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/pyproject.toml +0 -0
  88. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/requirements.txt +0 -0
  89. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/setup.cfg +0 -0
  90. {giga_spatial-0.6.3 → giga_spatial-0.6.4}/setup.py +0 -0
@@ -12,6 +12,8 @@ export OPENCELLID_ACCESS_TOKEN=""
12
12
  export GEOREPO_API_KEY=""
13
13
  export GEOREPO_USER_EMAIL=""
14
14
  export GIGA_SCHOOL_LOCATION_API_KEY=""
15
+ export GIGA_SCHOOL_PROFILE_API_KEY=""
16
+ export GIGA_SCHOOL_MEASUREMENTS_API_KEY=""
15
17
  export ROOT_DATA_DIR=""
16
18
  export BRONZE_DIR=""
17
19
  export SILVER_DIR=""
@@ -2,6 +2,42 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
+ ## [v0.6.4] - 2025-06-19
6
+
7
+ ### Added
8
+
9
+ - **GigaSchoolProfileFetcher**
10
+ - New class to fetch and process school profile data from the Giga School Profile API
11
+ - Supports paginated fetching, filtering by country and school ID
12
+ - Includes methods to generate connectivity summary statistics by region, connection type, and source
13
+
14
+ - **GigaSchoolMeasurementsFetcher**
15
+ - New class to fetch and process daily real-time connectivity measurements from the Giga API
16
+ - Supports filtering by date range and school
17
+ - Includes performance summary generation (download/upload speeds, latency, quality flags)
18
+
19
+ - **AdminBoundaries.from_geoboundaries**
20
+ - New class method to download and process geoBoundaries data by country and admin level
21
+ - Automatically handles HDX dataset discovery, downloading, and fallback logic
22
+
23
+ - **HDXConfig.search_datasets**
24
+ - Static method to search HDX datasets without full handler initialization
25
+ - Supports query string, sort order, result count, HDX site selection, and custom user agent
26
+
27
+ ### Fixed
28
+
29
+ - Typo in `MaxarImageDownloader` causing runtime error
30
+
31
+ ### Documentation
32
+
33
+ - **Improved Configuration Guide** (`docs/user-guide/configuration.md`)
34
+ - Added comprehensive table of environment variables with defaults and descriptions
35
+ - Synced `.env_sample` and `config.py` with docs
36
+ - Example `.env` file and guidance on path overrides using `config.set_path`
37
+ - New section on `config.ensure_directories_exist` and troubleshooting tips
38
+ - Clearer handling of credentials and security notes
39
+ - Improved formatting and structure for clarity
40
+
5
41
  ## [v0.6.3] - 2025-06-16
6
42
 
7
43
  ### Added
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: giga-spatial
3
- Version: 0.6.3
3
+ Version: 0.6.4
4
4
  Summary: A package for spatial data download & processing
5
5
  Home-page: https://github.com/unicef/giga-spatial
6
6
  Author: Utku Can Ozturk
@@ -0,0 +1 @@
1
+ --8<-- "CHANGELOG.md"
@@ -0,0 +1,130 @@
1
+ # Configuration
2
+
3
+ The `gigaspatial` package uses a unified configuration system, managed by the `config.py` file, to handle paths, API keys, and other settings. This guide explains how to configure the package for your environment.
4
+
5
+ ---
6
+
7
+ ## Environment Variables Overview
8
+
9
+ Configuration is primarily managed via environment variables, which can be set in a `.env` file at the project root. Below is a table of all supported environment variables, their defaults, and descriptions:
10
+
11
+ | Variable | Default | Description |
12
+ |----------------------------------|-----------------|--------------------------------------------------|
13
+ | ADLS_CONNECTION_STRING | "" | Azure Data Lake connection string |
14
+ | ADLS_CONTAINER_NAME | "" | Azure Data Lake container name |
15
+ | GOOGLE_SERVICE_ACCOUNT | "" | Google service account credentials |
16
+ | API_PROFILE_FILE_PATH | profile.share | Path to API profile file |
17
+ | API_SHARE_NAME | "" | API share name |
18
+ | API_SCHEMA_NAME | "" | API schema name |
19
+ | MAPBOX_ACCESS_TOKEN | "" | Mapbox API access token |
20
+ | MAXAR_USERNAME | "" | Maxar API username |
21
+ | MAXAR_PASSWORD | "" | Maxar API password |
22
+ | MAXAR_CONNECTION_STRING | "" | Maxar API connection string/key |
23
+ | OPENCELLID_ACCESS_TOKEN | "" | OpenCellID API access token |
24
+ | GEOREPO_API_KEY | "" | UNICEF GeoRepo API key |
25
+ | GEOREPO_USER_EMAIL | "" | UNICEF GeoRepo user email |
26
+ | GIGA_SCHOOL_LOCATION_API_KEY | "" | GIGA School Location API key |
27
+ | GIGA_SCHOOL_PROFILE_API_KEY | "" | GIGA School Profile API key |
28
+ | GIGA_SCHOOL_MEASUREMENTS_API_KEY | "" | GIGA School Measurements API key |
29
+ | ROOT_DATA_DIR | . | Root directory for all data tiers |
30
+ | BRONZE_DIR | bronze | Directory for raw/bronze tier data |
31
+ | SILVER_DIR | silver | Directory for processed/silver tier data |
32
+ | GOLD_DIR | gold | Directory for final/gold tier data |
33
+ | VIEWS_DIR | views | Directory for views data |
34
+ | CACHE_DIR | cache | Directory for cache/temp files |
35
+ | ADMIN_BOUNDARIES_DIR | admin_boundaries| Directory for admin boundary data |
36
+
37
+ > **Tip:** You can copy `.env_sample` to `.env` and fill in your values.
38
+
39
+ ---
40
+
41
+ ## Example `.env` File
42
+
43
+ ```bash
44
+ # Data directories
45
+ BRONZE_DIR=/path/to/your/bronze_tier_data
46
+ SILVER_DIR=/path/to/your/silver_tier_data
47
+ GOLD_DIR=/path/to/your/gold_tier_data
48
+ VIEWS_DIR=/path/to/your/views_data
49
+ CACHE_DIR=/path/to/your/cache
50
+ ADMIN_BOUNDARIES_DIR=/path/to/your/admin_boundaries
51
+
52
+ # API keys and credentials
53
+ MAPBOX_ACCESS_TOKEN=your_mapbox_token_here
54
+ MAXAR_USERNAME=your_maxar_username_here
55
+ MAXAR_PASSWORD=your_maxar_password_here
56
+ MAXAR_CONNECTION_STRING=your_maxar_key_here
57
+ # ... other keys ...
58
+ ```
59
+
60
+ ---
61
+
62
+ ## How Configuration is Loaded
63
+
64
+ - The `config.py` file uses [pydantic-settings](https://docs.pydantic.dev/latest/concepts/pydantic_settings/) to load environment variables from `.env` (if present) or the system environment.
65
+ - All directory paths are resolved as `Path` objects. If a path is relative, it is resolved relative to the current working directory.
66
+ - Defaults are used if environment variables are not set.
67
+
68
+ ---
69
+
70
+ ## Setting Paths and Keys Programmatically
71
+
72
+ You can override directory paths in your code using the `set_path` method:
73
+
74
+ ```python
75
+ from gigaspatial.config import config
76
+
77
+ # Set custom data storage paths
78
+ config.set_path("bronze", "/path/to/your/bronze_tier_data")
79
+ config.set_path("gold", "/path/to/your/gold_tier_data")
80
+ config.set_path("views", "/path/to/your/views_data")
81
+ ```
82
+
83
+ > **Note:** API keys and credentials should be set via environment variables for security.
84
+
85
+ ---
86
+
87
+ ## Ensuring Directories Exist
88
+
89
+ To ensure all configured directories exist (and optionally create them if missing):
90
+
91
+ ```python
92
+ from gigaspatial.config import config
93
+
94
+ # Raise error if any directory does not exist
95
+ config.ensure_directories_exist(create=False)
96
+
97
+ # Or, create missing directories automatically
98
+ config.ensure_directories_exist(create=True)
99
+ ```
100
+
101
+ ---
102
+
103
+ ## Verifying the Configuration
104
+
105
+ You can print the current configuration for debugging:
106
+
107
+ ```python
108
+ from gigaspatial.config import config
109
+ print(config)
110
+ ```
111
+
112
+ ---
113
+
114
+ ## Troubleshooting
115
+
116
+ - **.env File Location:** Ensure `.env` is in your project root.
117
+ - **Absolute Paths:** Use absolute paths for directories to avoid confusion.
118
+ - **Environment Variable Precedence:** Values in `.env` override defaults, but can be overridden by system environment variables.
119
+ - **Missing Directories:** Use `config.ensure_directories_exist(create=True)` to create missing directories.
120
+ - **API Keys:** Double-check that all required API keys are set for the services you use.
121
+
122
+ ---
123
+
124
+ ## Next Steps
125
+
126
+ Once configuration is set up, proceed to the [Data Handling Guide](data-handling/downloading.md) to start using `gigaspatial`.
127
+
128
+ ---
129
+
130
+ [Back to User Guide](../index.md)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: giga-spatial
3
- Version: 0.6.3
3
+ Version: 0.6.4
4
4
  Summary: A package for spatial data download & processing
5
5
  Home-page: https://github.com/unicef/giga-spatial
6
6
  Author: Utku Can Ozturk
@@ -0,0 +1 @@
1
+ __version__ = "0.6.4"
@@ -32,6 +32,12 @@ class Config(BaseSettings):
32
32
  GIGA_SCHOOL_LOCATION_API_KEY: str = Field(
33
33
  default="", alias="GIGA_SCHOOL_LOCATION_API_KEY"
34
34
  )
35
+ GIGA_SCHOOL_PROFILE_API_KEY: str = Field(
36
+ default="", alias="GIGA_SCHOOL_PROFILE_API_KEY"
37
+ )
38
+ GIGA_SCHOOL_MEASUREMENTS_API_KEY: str = Field(
39
+ default="", alias="GIGA_SCHOOL_MEASUREMENTS_API_KEY"
40
+ )
35
41
 
36
42
  ROOT_DATA_DIR: Path = Field(
37
43
  default=Path("."),
@@ -37,4 +37,8 @@ from gigaspatial.handlers.unicef_georepo import (
37
37
  GeoRepoClient,
38
38
  get_country_boundaries_by_iso3,
39
39
  )
40
- from gigaspatial.handlers.giga import GigaSchoolLocationFetcher
40
+ from gigaspatial.handlers.giga import (
41
+ GigaSchoolLocationFetcher,
42
+ GigaSchoolProfileFetcher,
43
+ GigaSchoolMeasurementsFetcher,
44
+ )
@@ -4,10 +4,12 @@ import geopandas as gpd
4
4
  from pathlib import Path
5
5
  from urllib.error import HTTPError
6
6
  from shapely.geometry import Polygon, MultiPolygon, shape
7
+ import tempfile
7
8
  import pycountry
8
9
 
9
10
  from gigaspatial.core.io.data_store import DataStore
10
11
  from gigaspatial.core.io.readers import read_dataset
12
+ from gigaspatial.handlers.hdx import HDXConfig
11
13
  from gigaspatial.config import config
12
14
 
13
15
 
@@ -61,8 +63,31 @@ class AdminBoundaries(BaseModel):
61
63
  "name_en": "name_en",
62
64
  "country_code": "iso_3166_1_alpha_3",
63
65
  },
66
+ "geoBoundaries": {
67
+ "id": "shapeID",
68
+ "name": "shapeName",
69
+ "country_code": "shapeGroup",
70
+ },
64
71
  }
65
72
 
73
+ def to_geodataframe(self) -> gpd.GeoDataFrame:
74
+ """Convert the AdminBoundaries to a GeoDataFrame."""
75
+ if not self.boundaries:
76
+ if hasattr(self, "_empty_schema"):
77
+ columns = self._empty_schema
78
+ else:
79
+ columns = ["id", "name", "country_code", "geometry"]
80
+ if self.level > 0:
81
+ columns.append("parent_id")
82
+
83
+ return gpd.GeoDataFrame(columns=columns, geometry="geometry", crs=4326)
84
+
85
+ return gpd.GeoDataFrame(
86
+ [boundary.model_dump() for boundary in self.boundaries],
87
+ geometry="geometry",
88
+ crs=4326,
89
+ )
90
+
66
91
  @classmethod
67
92
  def get_schema_config(cls) -> Dict[str, Dict[str, str]]:
68
93
  """Return field mappings for different data sources"""
@@ -100,6 +125,7 @@ class AdminBoundaries(BaseModel):
100
125
  cls.logger.warning(
101
126
  f"Error loading GADM data for {country_code} at admin level {admin_level}: {str(e)}"
102
127
  )
128
+ cls.logger.info("Falling back to empty instance")
103
129
  return cls._create_empty_instance(country_code, admin_level, "gadm")
104
130
 
105
131
  @classmethod
@@ -138,6 +164,7 @@ class AdminBoundaries(BaseModel):
138
164
  cls.logger.warning(
139
165
  f"No data found at {path} for admin level {admin_level}: {str(e)}"
140
166
  )
167
+ cls.logger.info("Falling back to empty instance")
141
168
  return cls._create_empty_instance(None, admin_level, "internal")
142
169
 
143
170
  @classmethod
@@ -202,6 +229,69 @@ class AdminBoundaries(BaseModel):
202
229
 
203
230
  return cls(boundaries=boundaries, level=admin_level)
204
231
 
232
+ @classmethod
233
+ def from_geoboundaries(cls, country_code, admin_level: int = 0):
234
+ cls.logger.info(
235
+ f"Searching for geoBoundaries data for country: {country_code}, admin level: {admin_level}"
236
+ )
237
+
238
+ country_datasets = HDXConfig.search_datasets(
239
+ query=f'dataseries_name:"geoBoundaries - Subnational Administrative Boundaries" AND groups:"{country_code.lower()}"',
240
+ rows=1,
241
+ )
242
+ if not country_datasets:
243
+ cls.logger.error(f"No datasets found for country: {country_code}")
244
+ raise ValueError(
245
+ "No resources found for the specified country. Please check your search parameters and try again."
246
+ )
247
+
248
+ cls.logger.info(f"Found dataset: {country_datasets[0].get('title', 'Unknown')}")
249
+
250
+ resources = [
251
+ resource
252
+ for resource in country_datasets[0].get_resources()
253
+ if (
254
+ resource.data["name"]
255
+ == f"geoBoundaries-{country_code.upper()}-ADM{admin_level}.geojson"
256
+ )
257
+ ]
258
+
259
+ if not resources:
260
+ cls.logger.error(
261
+ f"No resources found for {country_code} at admin level {admin_level}"
262
+ )
263
+ raise ValueError(
264
+ "No resources found for the specified criteria. Please check your search parameters and try again."
265
+ )
266
+
267
+ cls.logger.info(f"Found resource: {resources[0].data.get('name', 'Unknown')}")
268
+
269
+ try:
270
+ cls.logger.info("Downloading and processing boundary data...")
271
+ with tempfile.TemporaryDirectory() as tmpdir:
272
+ url, local_path = resources[0].download(folder=tmpdir)
273
+ cls.logger.debug(f"Downloaded file to temporary path: {local_path}")
274
+ with open(local_path, "rb") as f:
275
+ gdf = gpd.read_file(f)
276
+
277
+ gdf = cls._map_fields(gdf, "geoBoundaries", admin_level)
278
+ boundaries = [
279
+ AdminBoundary(**row_dict) for row_dict in gdf.to_dict("records")
280
+ ]
281
+ cls.logger.info(
282
+ f"Successfully created {len(boundaries)} AdminBoundary objects"
283
+ )
284
+ return cls(boundaries=boundaries, level=admin_level)
285
+
286
+ except (ValueError, HTTPError, FileNotFoundError) as e:
287
+ cls.logger.warning(
288
+ f"Error loading geoBoundaries data for {country_code} at admin level {admin_level}: {str(e)}"
289
+ )
290
+ cls.logger.info("Falling back to empty instance")
291
+ return cls._create_empty_instance(
292
+ country_code, admin_level, "geoBoundaries"
293
+ )
294
+
205
295
  @classmethod
206
296
  def create(
207
297
  cls,
@@ -211,45 +301,126 @@ class AdminBoundaries(BaseModel):
211
301
  path: Optional[Union[str, "Path"]] = None,
212
302
  **kwargs,
213
303
  ) -> "AdminBoundaries":
214
- """Factory method to create AdminBoundaries instance from either GADM or data store."""
304
+ """Factory method to create AdminBoundaries instance from either GADM or data store.
305
+
306
+ Args:
307
+ country_code: ISO country code (2 or 3 letter) or country name
308
+ admin_level: Administrative level (0=country, 1=state/province, etc.)
309
+ data_store: Optional data store instance for loading from existing data
310
+ path: Optional path to data file (used with data_store)
311
+ **kwargs: Additional arguments passed to the underlying creation methods
312
+
313
+ Returns:
314
+ AdminBoundaries: Configured instance
315
+
316
+ Raises:
317
+ ValueError: If neither country_code nor (data_store, path) are provided,
318
+ or if country_code lookup fails
319
+
320
+ Example:
321
+ # From country code
322
+ boundaries = AdminBoundaries.create(country_code="USA", admin_level=1)
323
+
324
+ # From data store
325
+ boundaries = AdminBoundaries.create(data_store=store, path="data.shp")
326
+ """
215
327
  cls.logger.info(
216
- f"Creating AdminBoundaries instance. Country: {country_code}, admin level: {admin_level}, data_store provided: {data_store is not None}, path provided: {path is not None}"
328
+ f"Creating AdminBoundaries instance. Country: {country_code}, "
329
+ f"admin level: {admin_level}, data_store provided: {data_store is not None}, "
330
+ f"path provided: {path is not None}"
217
331
  )
218
- iso3_code = pycountry.countries.lookup(country_code).alpha_3
332
+
333
+ # Validate input parameters
334
+ if not country_code and not data_store:
335
+ raise ValueError("Either country_code or data_store must be provided.")
336
+
337
+ if data_store and not path and not country_code:
338
+ raise ValueError(
339
+ "If data_store is provided, either path or country_code must also be specified."
340
+ )
341
+
342
+ # Handle data store path first
219
343
  if data_store is not None:
220
- if path is None:
221
- if country_code is None:
222
- ValueError(
223
- "If data_store is provided, path or country_code must also be specified."
224
- )
344
+ iso3_code = None
345
+ if country_code:
346
+ try:
347
+ iso3_code = pycountry.countries.lookup(country_code).alpha_3
348
+ except LookupError as e:
349
+ raise ValueError(f"Invalid country code '{country_code}': {e}")
350
+
351
+ # Generate path if not provided
352
+ if path is None and iso3_code:
225
353
  path = config.get_admin_path(
226
354
  country_code=iso3_code,
227
355
  admin_level=admin_level,
228
356
  )
357
+
229
358
  return cls.from_data_store(data_store, path, admin_level, **kwargs)
230
- elif country_code is not None:
231
- from gigaspatial.handlers.unicef_georepo import GeoRepoClient
232
359
 
360
+ # Handle country code path
361
+ if country_code is not None:
233
362
  try:
234
- client = GeoRepoClient()
235
- if client.check_connection():
236
- cls.logger.info("GeoRepo connection successful.")
237
- return cls.from_georepo(
238
- iso3_code,
239
- admin_level=admin_level,
240
- )
241
- except ValueError as e:
363
+ iso3_code = pycountry.countries.lookup(country_code).alpha_3
364
+ except LookupError as e:
365
+ raise ValueError(f"Invalid country code '{country_code}': {e}")
366
+
367
+ # Try GeoRepo first
368
+ if cls._try_georepo(iso3_code, admin_level):
369
+ return cls.from_georepo(iso3_code, admin_level=admin_level)
370
+
371
+ # Fallback to GADM
372
+ try:
373
+ cls.logger.info("Attempting to load from GADM.")
374
+ return cls.from_gadm(iso3_code, admin_level, **kwargs)
375
+ except Exception as e:
242
376
  cls.logger.warning(
243
- f"GeoRepo initialization failed: {str(e)}. Falling back to GADM."
377
+ f"GADM loading failed: {e}. Falling back to geoBoundaries."
244
378
  )
379
+
380
+ # Final fallback to geoBoundaries
381
+ try:
382
+ return cls.from_geoboundaries(iso3_code, admin_level)
245
383
  except Exception as e:
246
- cls.logger.warning(f"GeoRepo error: {str(e)}. Falling back to GADM.")
384
+ cls.logger.error(f"All data sources failed. geoBoundaries error: {e}")
385
+ raise RuntimeError(
386
+ f"Failed to load administrative boundaries for {country_code} "
387
+ f"from all available sources (GeoRepo, GADM, geoBoundaries)."
388
+ ) from e
247
389
 
248
- return cls.from_gadm(iso3_code, admin_level, **kwargs)
249
- else:
250
- raise ValueError(
251
- "Either country_code or (data_store, path) must be provided."
252
- )
390
+ # This should never be reached due to validation above
391
+ raise ValueError("Unexpected error: no valid data source could be determined.")
392
+
393
+ @classmethod
394
+ def _try_georepo(cls, iso3_code: str, admin_level: int) -> bool:
395
+ """Helper method to test GeoRepo availability.
396
+
397
+ Args:
398
+ iso3_code: ISO3 country code
399
+ admin_level: Administrative level
400
+
401
+ Returns:
402
+ bool: True if GeoRepo is available and working, False otherwise
403
+ """
404
+ try:
405
+ from gigaspatial.handlers.unicef_georepo import GeoRepoClient
406
+
407
+ client = GeoRepoClient()
408
+ if client.check_connection():
409
+ cls.logger.info("GeoRepo connection successful.")
410
+ return True
411
+ else:
412
+ cls.logger.info("GeoRepo connection failed.")
413
+ return False
414
+
415
+ except ImportError:
416
+ cls.logger.info("GeoRepo client not available (import failed).")
417
+ return False
418
+ except ValueError as e:
419
+ cls.logger.warning(f"GeoRepo initialization failed: {e}")
420
+ return False
421
+ except Exception as e:
422
+ cls.logger.warning(f"GeoRepo error: {e}")
423
+ return False
253
424
 
254
425
  @classmethod
255
426
  def _create_empty_instance(
@@ -288,21 +459,3 @@ class AdminBoundaries(BaseModel):
288
459
  field_mapping[v] = k
289
460
 
290
461
  return gdf.rename(columns=field_mapping)
291
-
292
- def to_geodataframe(self) -> gpd.GeoDataFrame:
293
- """Convert the AdminBoundaries to a GeoDataFrame."""
294
- if not self.boundaries:
295
- if hasattr(self, "_empty_schema"):
296
- columns = self._empty_schema
297
- else:
298
- columns = ["id", "name", "country_code", "geometry"]
299
- if self.level > 0:
300
- columns.append("parent_id")
301
-
302
- return gpd.GeoDataFrame(columns=columns, geometry="geometry", crs=4326)
303
-
304
- return gpd.GeoDataFrame(
305
- [boundary.model_dump() for boundary in self.boundaries],
306
- geometry="geometry",
307
- crs=4326,
308
- )