giga-spatial 0.6.3__tar.gz → 0.6.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/.env_sample +3 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/CHANGELOG.md +118 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/PKG-INFO +2 -1
- giga_spatial-0.6.5/docs/changelog.md +1 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/docs/getting-started/quickstart.md +10 -17
- giga_spatial-0.6.5/docs/user-guide/configuration.md +130 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/giga_spatial.egg-info/PKG-INFO +2 -1
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/giga_spatial.egg-info/SOURCES.txt +3 -0
- giga_spatial-0.6.3/requirements.txt → giga_spatial-0.6.5/giga_spatial.egg-info/requires.txt +2 -1
- giga_spatial-0.6.5/gigaspatial/__init__.py +1 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/config.py +35 -4
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/core/io/__init__.py +1 -0
- giga_spatial-0.6.5/gigaspatial/core/io/database.py +316 -0
- giga_spatial-0.6.5/gigaspatial/generators/__init__.py +6 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/generators/poi.py +228 -43
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/generators/zonal/__init__.py +2 -1
- giga_spatial-0.6.5/gigaspatial/generators/zonal/admin.py +84 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/generators/zonal/base.py +221 -64
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/generators/zonal/geometry.py +74 -31
- giga_spatial-0.6.5/gigaspatial/generators/zonal/mercator.py +109 -0
- giga_spatial-0.6.5/gigaspatial/grid/__init__.py +1 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/grid/mercator_tiles.py +33 -10
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/handlers/__init__.py +5 -1
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/handlers/boundaries.py +226 -48
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/handlers/ghsl.py +79 -14
- giga_spatial-0.6.5/gigaspatial/handlers/giga.py +786 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/handlers/hdx.py +50 -51
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/handlers/maxar_image.py +1 -2
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/handlers/rwi.py +5 -2
- giga_spatial-0.6.5/gigaspatial/processing/algorithms.py +188 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/processing/geo.py +87 -25
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/processing/tif_processor.py +220 -45
- giga_spatial-0.6.3/giga_spatial.egg-info/requires.txt → giga_spatial-0.6.5/requirements.txt +1 -0
- giga_spatial-0.6.3/docs/changelog.md +0 -21
- giga_spatial-0.6.3/docs/user-guide/configuration.md +0 -76
- giga_spatial-0.6.3/gigaspatial/__init__.py +0 -1
- giga_spatial-0.6.3/gigaspatial/generators/__init__.py +0 -2
- giga_spatial-0.6.3/gigaspatial/generators/zonal/mercator.py +0 -78
- giga_spatial-0.6.3/gigaspatial/grid/__init__.py +0 -1
- giga_spatial-0.6.3/gigaspatial/handlers/giga.py +0 -145
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/CODE_OF_CONDUCT.md +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/CONTRIBUTING.md +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/LICENSE +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/MANIFEST.in +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/PULL_REQUEST_TEMPLATE.md +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/README.md +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/docs/.DS_Store +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/docs/api/core.md +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/docs/api/generators.md +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/docs/api/grid.md +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/docs/api/handlers.md +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/docs/api/index.md +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/docs/api/processing.md +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/docs/assets/GIGA_horizontal_notext_white.webp +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/docs/assets/datasets.png +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/docs/assets/logo.png +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/docs/contributing.md +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/docs/examples/advanced.md +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/docs/examples/basic.md +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/docs/examples/downloading/ghsl.md +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/docs/examples/downloading/osm.md +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/docs/examples/index.md +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/docs/examples/processing/tif.md +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/docs/examples/use-cases.md +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/docs/getting-started/installation.md +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/docs/index.md +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/docs/license.md +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/docs/stylesheets/extra.css +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/docs/user-guide/index.md +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/giga_spatial.egg-info/dependency_links.txt +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/giga_spatial.egg-info/top_level.txt +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/core/__init__.py +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/core/io/adls_data_store.py +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/core/io/data_api.py +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/core/io/data_store.py +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/core/io/local_data_store.py +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/core/io/readers.py +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/core/io/writers.py +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/core/schemas/__init__.py +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/core/schemas/entity.py +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/handlers/base.py +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/handlers/google_open_buildings.py +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/handlers/mapbox_image.py +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/handlers/microsoft_global_buildings.py +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/handlers/ookla_speedtest.py +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/handlers/opencellid.py +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/handlers/osm.py +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/handlers/overture.py +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/handlers/unicef_georepo.py +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/handlers/worldpop.py +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/processing/__init__.py +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/processing/sat_images.py +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/gigaspatial/processing/utils.py +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/pyproject.toml +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/setup.cfg +0 -0
- {giga_spatial-0.6.3 → giga_spatial-0.6.5}/setup.py +0 -0
@@ -4,6 +4,7 @@ export GOOGLE_SERVICE_ACCOUNT=""
|
|
4
4
|
export API_PROFILE_FILE_PATH=""
|
5
5
|
export API_SHARE_NAME=""
|
6
6
|
export API_SCHEMA_NAME=""
|
7
|
+
export DB_CONFIG='{}'
|
7
8
|
export MAPBOX_ACCESS_TOKEN=""
|
8
9
|
export MAXAR_USERNAME=""
|
9
10
|
export MAXAR_PASSWORD=""
|
@@ -12,6 +13,8 @@ export OPENCELLID_ACCESS_TOKEN=""
|
|
12
13
|
export GEOREPO_API_KEY=""
|
13
14
|
export GEOREPO_USER_EMAIL=""
|
14
15
|
export GIGA_SCHOOL_LOCATION_API_KEY=""
|
16
|
+
export GIGA_SCHOOL_PROFILE_API_KEY=""
|
17
|
+
export GIGA_SCHOOL_MEASUREMENTS_API_KEY=""
|
15
18
|
export ROOT_DATA_DIR=""
|
16
19
|
export BRONZE_DIR=""
|
17
20
|
export SILVER_DIR=""
|
@@ -2,6 +2,124 @@
|
|
2
2
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
4
4
|
|
5
|
+
## [v0.6.5] - 2025-07-01
|
6
|
+
|
7
|
+
### Added
|
8
|
+
|
9
|
+
- **`MercatorTiles.get_quadkeys_from_points()`**
|
10
|
+
New static method for efficient 1:1 point-to-quadkey mapping using coordinate-based logic, improving performance over spatial joins.
|
11
|
+
|
12
|
+
- **`AdminBoundariesViewGenerator`**
|
13
|
+
New generator class for producing zonal views based on administrative boundaries (e.g., districts, provinces) with flexible source and admin level support.
|
14
|
+
|
15
|
+
- **Zonal View Generator Enhancements**
|
16
|
+
- `_view`: Internal attribute for accumulating mapped statistics.
|
17
|
+
- `view`: Exposes current state of zonal view.
|
18
|
+
- `add_variable_to_view()`: Adds mapped data from `map_points`, `map_polygons`, or `map_rasters` with robust validation and zone alignment.
|
19
|
+
- `to_dataframe()` and `to_geodataframe()` methods added for exporting current view in tabular or spatial formats.
|
20
|
+
|
21
|
+
- **`PoiViewGenerator` Enhancements**
|
22
|
+
- Consistent `_view` DataFrame for storing mapped results.
|
23
|
+
- `_update_view()`: Central method to update POI data.
|
24
|
+
- `save_view()`: Improved format handling (CSV, Parquet, GeoJSON, etc.) with geometry recovery.
|
25
|
+
- `to_dataframe()` and `to_geodataframe()` methods added for convenient export of enriched POI view.
|
26
|
+
- Robust duplicate ID detection and CRS validation in `map_zonal_stats`.
|
27
|
+
|
28
|
+
- **`TifProcessor` Enhancements**
|
29
|
+
- `sample_by_polygons_batched()`: Parallel polygon sampling.
|
30
|
+
- Enhanced `sample_by_polygons()` with nodata masking and multiple stats.
|
31
|
+
- `warn_on_error`: Flag to suppress sampling warnings.
|
32
|
+
|
33
|
+
- **GeoTIFF Multi-Band Support**
|
34
|
+
- `multi` mode added for multi-band raster support.
|
35
|
+
- Auto-detects band names via metadata.
|
36
|
+
- Strict validation of band count based on mode (`single`, `rgb`, `rgba`, `multi`).
|
37
|
+
|
38
|
+
- **Spatial Distance Graph Algorithm**
|
39
|
+
- `build_distance_graph()` added for fast KD-tree-based spatial matching.
|
40
|
+
- Supports both `DataFrame` and `GeoDataFrame` inputs.
|
41
|
+
- Outputs a `networkx.Graph` with optional DataFrame of matches.
|
42
|
+
- Handles projections, self-match exclusion, and includes verbose stats/logs.
|
43
|
+
|
44
|
+
- **Database Integration (Experimental)**
|
45
|
+
- Added `DBConnection` class in `core/io/database.py` for unified Trino and PostgreSQL access.
|
46
|
+
- Supports schema/table introspection, query execution, and reading into `pandas` or `dask`.
|
47
|
+
- Handles connection creation, credential management, and diagnostics.
|
48
|
+
- Utility methods for schema/view/table/column listings and parameterized queries.
|
49
|
+
|
50
|
+
- **GHSL Population Mapping**
|
51
|
+
- `map_ghsl_pop()` method added to `GeometryBasedZonalViewGenerator`.
|
52
|
+
- Aggregates GHSL population rasters to user-defined zones.
|
53
|
+
- Supports `intersects` and `fractional` predicates (latter for 1000m resolution only).
|
54
|
+
- Returns population statistics (e.g., `sum`) with customizable column prefix.
|
55
|
+
|
56
|
+
### Changed
|
57
|
+
|
58
|
+
- **`MercatorTiles.from_points()`** now internally uses `get_quadkeys_from_points()` for better performance.
|
59
|
+
|
60
|
+
- **`map_points()` and `map_rasters()`** now return `Dict[zone_id, value]` to support direct usage with `add_variable_to_view()`.
|
61
|
+
|
62
|
+
- **Refactored `aggregate_polygons_to_zones()`**
|
63
|
+
- `area_weighted` deprecated in favor of `predicate`.
|
64
|
+
- Supports flexible predicates like `"within"`, `"fractional"` for spatial aggregation.
|
65
|
+
- `map_polygons()` updated to reflect this change.
|
66
|
+
|
67
|
+
- **Optional Admin Boundaries Configuration**
|
68
|
+
- `ADMIN_BOUNDARIES_DATA_DIR` is now optional.
|
69
|
+
- `AdminBoundaries.create()` only attempts to load if explicitly configured or path is provided.
|
70
|
+
- Improved documentation and fallback behavior for missing configs.
|
71
|
+
|
72
|
+
### Fixed
|
73
|
+
|
74
|
+
- **GHSL Downloader**
|
75
|
+
- ZIP files are now downloaded into a temporary cache directory using `requests.get()`.
|
76
|
+
- Avoids unnecessary writes and ensures cleanup.
|
77
|
+
|
78
|
+
- **`TifProcessor`**
|
79
|
+
- Removed polygon sampling warnings unless explicitly enabled.
|
80
|
+
|
81
|
+
### Deprecated
|
82
|
+
|
83
|
+
- `TifProcessor.tabular` → use `to_dataframe()` instead.
|
84
|
+
- `TifProcessor.get_zoned_geodataframe()` → use `to_geodataframe()` instead.
|
85
|
+
- `area_weighted` → use `predicate` in aggregation methods instead.
|
86
|
+
|
87
|
+
## [v0.6.4] - 2025-06-19
|
88
|
+
|
89
|
+
### Added
|
90
|
+
|
91
|
+
- **GigaSchoolProfileFetcher**
|
92
|
+
- New class to fetch and process school profile data from the Giga School Profile API
|
93
|
+
- Supports paginated fetching, filtering by country and school ID
|
94
|
+
- Includes methods to generate connectivity summary statistics by region, connection type, and source
|
95
|
+
|
96
|
+
- **GigaSchoolMeasurementsFetcher**
|
97
|
+
- New class to fetch and process daily real-time connectivity measurements from the Giga API
|
98
|
+
- Supports filtering by date range and school
|
99
|
+
- Includes performance summary generation (download/upload speeds, latency, quality flags)
|
100
|
+
|
101
|
+
- **AdminBoundaries.from_geoboundaries**
|
102
|
+
- New class method to download and process geoBoundaries data by country and admin level
|
103
|
+
- Automatically handles HDX dataset discovery, downloading, and fallback logic
|
104
|
+
|
105
|
+
- **HDXConfig.search_datasets**
|
106
|
+
- Static method to search HDX datasets without full handler initialization
|
107
|
+
- Supports query string, sort order, result count, HDX site selection, and custom user agent
|
108
|
+
|
109
|
+
### Fixed
|
110
|
+
|
111
|
+
- Typo in `MaxarImageDownloader` causing runtime error
|
112
|
+
|
113
|
+
### Documentation
|
114
|
+
|
115
|
+
- **Improved Configuration Guide** (`docs/user-guide/configuration.md`)
|
116
|
+
- Added comprehensive table of environment variables with defaults and descriptions
|
117
|
+
- Synced `.env_sample` and `config.py` with docs
|
118
|
+
- Example `.env` file and guidance on path overrides using `config.set_path`
|
119
|
+
- New section on `config.ensure_directories_exist` and troubleshooting tips
|
120
|
+
- Clearer handling of credentials and security notes
|
121
|
+
- Improved formatting and structure for clarity
|
122
|
+
|
5
123
|
## [v0.6.3] - 2025-06-16
|
6
124
|
|
7
125
|
### Added
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: giga-spatial
|
3
|
-
Version: 0.6.
|
3
|
+
Version: 0.6.5
|
4
4
|
Summary: A package for spatial data download & processing
|
5
5
|
Home-page: https://github.com/unicef/giga-spatial
|
6
6
|
Author: Utku Can Ozturk
|
@@ -31,6 +31,7 @@ Requires-Dist: OWSLib==0.32.1
|
|
31
31
|
Requires-Dist: pydantic-settings>=2.7.1
|
32
32
|
Requires-Dist: hdx-python-api>=6.3.8
|
33
33
|
Requires-Dist: bs4==0.0.2
|
34
|
+
Requires-Dist: sqlalchemy-trino==0.5.0
|
34
35
|
Dynamic: author
|
35
36
|
Dynamic: author-email
|
36
37
|
Dynamic: classifier
|
@@ -0,0 +1 @@
|
|
1
|
+
--8<-- "CHANGELOG.md"
|
@@ -16,43 +16,36 @@ import gigaspatial as gs
|
|
16
16
|
|
17
17
|
## Setting Up Configuration
|
18
18
|
|
19
|
-
The `gigaspatial` package uses a configuration
|
19
|
+
The `gigaspatial` package uses a unified configuration system to manage paths, API keys, and other settings.
|
20
20
|
|
21
|
-
|
21
|
+
- **Environment Variables:** Most configuration is handled via environment variables, which can be set in a `.env` file at the project root. For a full list of supported variables and their descriptions, see the [Configuration Guide](../user-guide/configuration.md).
|
22
|
+
- **Defaults:** If not set, sensible defaults are used for all paths and keys.
|
23
|
+
- **Manual Overrides:** You can override data directory paths in your code using `config.set_path`.
|
22
24
|
|
23
|
-
|
25
|
+
### Example `.env` File
|
24
26
|
|
25
27
|
```bash
|
26
|
-
# Paths for different data types
|
27
28
|
BRONZE_DIR=/path/to/your/bronze_tier_data
|
28
29
|
SILVER_DIR=/path/to/your/silver_tier_data
|
29
30
|
GOLD_DIR=/path/to/your/gold_tier_data
|
30
31
|
VIEWS_DIR=/path/to/your/views_data
|
31
|
-
|
32
|
-
|
33
|
-
# API keys and tokens
|
32
|
+
CACHE_DIR=/path/to/your/cache
|
33
|
+
ADMIN_BOUNDARIES_DIR=/path/to/your/admin_boundaries
|
34
34
|
MAPBOX_ACCESS_TOKEN=your_mapbox_token_here
|
35
|
-
|
36
|
-
MAXAR_PASSWORD=your_maxar_password_here
|
37
|
-
MAXAR_CONNECTION_STRING=your_maxar_key_here
|
35
|
+
# ... other keys ...
|
38
36
|
```
|
39
37
|
|
40
|
-
|
41
|
-
|
42
|
-
### Setting Paths Manually
|
43
|
-
|
44
|
-
You can also set paths manually in your code:
|
38
|
+
### Setting Paths Programmatically
|
45
39
|
|
46
40
|
```python
|
47
41
|
from gigaspatial.config import config
|
48
42
|
|
49
|
-
# Example: Setting custom data storage paths
|
50
43
|
config.set_path("bronze", "/path/to/your/bronze_tier_data")
|
51
44
|
config.set_path("gold", "/path/to/your/gold_tier_data")
|
52
45
|
config.set_path("views", "/path/to/your/views_data")
|
53
46
|
```
|
54
47
|
|
55
|
-
|
48
|
+
> For more details and troubleshooting, see the [full configuration guide](../user-guide/configuration.md).
|
56
49
|
|
57
50
|
## Downloading and Processing Geospatial Data
|
58
51
|
|
@@ -0,0 +1,130 @@
|
|
1
|
+
# Configuration
|
2
|
+
|
3
|
+
The `gigaspatial` package uses a unified configuration system, managed by the `config.py` file, to handle paths, API keys, and other settings. This guide explains how to configure the package for your environment.
|
4
|
+
|
5
|
+
---
|
6
|
+
|
7
|
+
## Environment Variables Overview
|
8
|
+
|
9
|
+
Configuration is primarily managed via environment variables, which can be set in a `.env` file at the project root. Below is a table of all supported environment variables, their defaults, and descriptions:
|
10
|
+
|
11
|
+
| Variable | Default | Description |
|
12
|
+
|----------------------------------|-----------------|--------------------------------------------------|
|
13
|
+
| ADLS_CONNECTION_STRING | "" | Azure Data Lake connection string |
|
14
|
+
| ADLS_CONTAINER_NAME | "" | Azure Data Lake container name |
|
15
|
+
| GOOGLE_SERVICE_ACCOUNT | "" | Google service account credentials |
|
16
|
+
| API_PROFILE_FILE_PATH | profile.share | Path to API profile file |
|
17
|
+
| API_SHARE_NAME | "" | API share name |
|
18
|
+
| API_SCHEMA_NAME | "" | API schema name |
|
19
|
+
| MAPBOX_ACCESS_TOKEN | "" | Mapbox API access token |
|
20
|
+
| MAXAR_USERNAME | "" | Maxar API username |
|
21
|
+
| MAXAR_PASSWORD | "" | Maxar API password |
|
22
|
+
| MAXAR_CONNECTION_STRING | "" | Maxar API connection string/key |
|
23
|
+
| OPENCELLID_ACCESS_TOKEN | "" | OpenCellID API access token |
|
24
|
+
| GEOREPO_API_KEY | "" | UNICEF GeoRepo API key |
|
25
|
+
| GEOREPO_USER_EMAIL | "" | UNICEF GeoRepo user email |
|
26
|
+
| GIGA_SCHOOL_LOCATION_API_KEY | "" | GIGA School Location API key |
|
27
|
+
| GIGA_SCHOOL_PROFILE_API_KEY | "" | GIGA School Profile API key |
|
28
|
+
| GIGA_SCHOOL_MEASUREMENTS_API_KEY | "" | GIGA School Measurements API key |
|
29
|
+
| ROOT_DATA_DIR | . | Root directory for all data tiers |
|
30
|
+
| BRONZE_DIR | bronze | Directory for raw/bronze tier data |
|
31
|
+
| SILVER_DIR | silver | Directory for processed/silver tier data |
|
32
|
+
| GOLD_DIR | gold | Directory for final/gold tier data |
|
33
|
+
| VIEWS_DIR | views | Directory for views data |
|
34
|
+
| CACHE_DIR | cache | Directory for cache/temp files |
|
35
|
+
| ADMIN_BOUNDARIES_DIR | admin_boundaries| Directory for admin boundary data |
|
36
|
+
|
37
|
+
> **Tip:** You can copy `.env_sample` to `.env` and fill in your values.
|
38
|
+
|
39
|
+
---
|
40
|
+
|
41
|
+
## Example `.env` File
|
42
|
+
|
43
|
+
```bash
|
44
|
+
# Data directories
|
45
|
+
BRONZE_DIR=/path/to/your/bronze_tier_data
|
46
|
+
SILVER_DIR=/path/to/your/silver_tier_data
|
47
|
+
GOLD_DIR=/path/to/your/gold_tier_data
|
48
|
+
VIEWS_DIR=/path/to/your/views_data
|
49
|
+
CACHE_DIR=/path/to/your/cache
|
50
|
+
ADMIN_BOUNDARIES_DIR=/path/to/your/admin_boundaries
|
51
|
+
|
52
|
+
# API keys and credentials
|
53
|
+
MAPBOX_ACCESS_TOKEN=your_mapbox_token_here
|
54
|
+
MAXAR_USERNAME=your_maxar_username_here
|
55
|
+
MAXAR_PASSWORD=your_maxar_password_here
|
56
|
+
MAXAR_CONNECTION_STRING=your_maxar_key_here
|
57
|
+
# ... other keys ...
|
58
|
+
```
|
59
|
+
|
60
|
+
---
|
61
|
+
|
62
|
+
## How Configuration is Loaded
|
63
|
+
|
64
|
+
- The `config.py` file uses [pydantic-settings](https://docs.pydantic.dev/latest/concepts/pydantic_settings/) to load environment variables from `.env` (if present) or the system environment.
|
65
|
+
- All directory paths are resolved as `Path` objects. If a path is relative, it is resolved relative to the current working directory.
|
66
|
+
- Defaults are used if environment variables are not set.
|
67
|
+
|
68
|
+
---
|
69
|
+
|
70
|
+
## Setting Paths and Keys Programmatically
|
71
|
+
|
72
|
+
You can override directory paths in your code using the `set_path` method:
|
73
|
+
|
74
|
+
```python
|
75
|
+
from gigaspatial.config import config
|
76
|
+
|
77
|
+
# Set custom data storage paths
|
78
|
+
config.set_path("bronze", "/path/to/your/bronze_tier_data")
|
79
|
+
config.set_path("gold", "/path/to/your/gold_tier_data")
|
80
|
+
config.set_path("views", "/path/to/your/views_data")
|
81
|
+
```
|
82
|
+
|
83
|
+
> **Note:** API keys and credentials should be set via environment variables for security.
|
84
|
+
|
85
|
+
---
|
86
|
+
|
87
|
+
## Ensuring Directories Exist
|
88
|
+
|
89
|
+
To ensure all configured directories exist (and optionally create them if missing):
|
90
|
+
|
91
|
+
```python
|
92
|
+
from gigaspatial.config import config
|
93
|
+
|
94
|
+
# Raise error if any directory does not exist
|
95
|
+
config.ensure_directories_exist(create=False)
|
96
|
+
|
97
|
+
# Or, create missing directories automatically
|
98
|
+
config.ensure_directories_exist(create=True)
|
99
|
+
```
|
100
|
+
|
101
|
+
---
|
102
|
+
|
103
|
+
## Verifying the Configuration
|
104
|
+
|
105
|
+
You can print the current configuration for debugging:
|
106
|
+
|
107
|
+
```python
|
108
|
+
from gigaspatial.config import config
|
109
|
+
print(config)
|
110
|
+
```
|
111
|
+
|
112
|
+
---
|
113
|
+
|
114
|
+
## Troubleshooting
|
115
|
+
|
116
|
+
- **.env File Location:** Ensure `.env` is in your project root.
|
117
|
+
- **Absolute Paths:** Use absolute paths for directories to avoid confusion.
|
118
|
+
- **Environment Variable Precedence:** Values in `.env` override defaults, but can be overridden by system environment variables.
|
119
|
+
- **Missing Directories:** Use `config.ensure_directories_exist(create=True)` to create missing directories.
|
120
|
+
- **API Keys:** Double-check that all required API keys are set for the services you use.
|
121
|
+
|
122
|
+
---
|
123
|
+
|
124
|
+
## Next Steps
|
125
|
+
|
126
|
+
Once configuration is set up, proceed to the [Data Handling Guide](data-handling/downloading.md) *(Coming Soon)* to start using `gigaspatial`.
|
127
|
+
|
128
|
+
---
|
129
|
+
|
130
|
+
[Back to User Guide](../index.md)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: giga-spatial
|
3
|
-
Version: 0.6.
|
3
|
+
Version: 0.6.5
|
4
4
|
Summary: A package for spatial data download & processing
|
5
5
|
Home-page: https://github.com/unicef/giga-spatial
|
6
6
|
Author: Utku Can Ozturk
|
@@ -31,6 +31,7 @@ Requires-Dist: OWSLib==0.32.1
|
|
31
31
|
Requires-Dist: pydantic-settings>=2.7.1
|
32
32
|
Requires-Dist: hdx-python-api>=6.3.8
|
33
33
|
Requires-Dist: bs4==0.0.2
|
34
|
+
Requires-Dist: sqlalchemy-trino==0.5.0
|
34
35
|
Dynamic: author
|
35
36
|
Dynamic: author-email
|
36
37
|
Dynamic: classifier
|
@@ -47,6 +47,7 @@ gigaspatial/core/io/__init__.py
|
|
47
47
|
gigaspatial/core/io/adls_data_store.py
|
48
48
|
gigaspatial/core/io/data_api.py
|
49
49
|
gigaspatial/core/io/data_store.py
|
50
|
+
gigaspatial/core/io/database.py
|
50
51
|
gigaspatial/core/io/local_data_store.py
|
51
52
|
gigaspatial/core/io/readers.py
|
52
53
|
gigaspatial/core/io/writers.py
|
@@ -55,6 +56,7 @@ gigaspatial/core/schemas/entity.py
|
|
55
56
|
gigaspatial/generators/__init__.py
|
56
57
|
gigaspatial/generators/poi.py
|
57
58
|
gigaspatial/generators/zonal/__init__.py
|
59
|
+
gigaspatial/generators/zonal/admin.py
|
58
60
|
gigaspatial/generators/zonal/base.py
|
59
61
|
gigaspatial/generators/zonal/geometry.py
|
60
62
|
gigaspatial/generators/zonal/mercator.py
|
@@ -78,6 +80,7 @@ gigaspatial/handlers/rwi.py
|
|
78
80
|
gigaspatial/handlers/unicef_georepo.py
|
79
81
|
gigaspatial/handlers/worldpop.py
|
80
82
|
gigaspatial/processing/__init__.py
|
83
|
+
gigaspatial/processing/algorithms.py
|
81
84
|
gigaspatial/processing/geo.py
|
82
85
|
gigaspatial/processing/sat_images.py
|
83
86
|
gigaspatial/processing/tif_processor.py
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__ = "0.6.5"
|
@@ -32,6 +32,12 @@ class Config(BaseSettings):
|
|
32
32
|
GIGA_SCHOOL_LOCATION_API_KEY: str = Field(
|
33
33
|
default="", alias="GIGA_SCHOOL_LOCATION_API_KEY"
|
34
34
|
)
|
35
|
+
GIGA_SCHOOL_PROFILE_API_KEY: str = Field(
|
36
|
+
default="", alias="GIGA_SCHOOL_PROFILE_API_KEY"
|
37
|
+
)
|
38
|
+
GIGA_SCHOOL_MEASUREMENTS_API_KEY: str = Field(
|
39
|
+
default="", alias="GIGA_SCHOOL_MEASUREMENTS_API_KEY"
|
40
|
+
)
|
35
41
|
|
36
42
|
ROOT_DATA_DIR: Path = Field(
|
37
43
|
default=Path("."),
|
@@ -64,11 +70,12 @@ class Config(BaseSettings):
|
|
64
70
|
description="Directory for temporary/cache files",
|
65
71
|
alias="CACHE_DIR",
|
66
72
|
)
|
67
|
-
ADMIN_BOUNDARIES_DATA_DIR: Path = Field(
|
68
|
-
default=
|
73
|
+
ADMIN_BOUNDARIES_DATA_DIR: Optional[Path] = Field(
|
74
|
+
default=None,
|
69
75
|
description="Root directory for administrative boundary data",
|
70
76
|
alias="ADMIN_BOUNDARIES_DIR",
|
71
77
|
)
|
78
|
+
DB_CONFIG: Optional[Dict] = Field(default=None, alias="DB_CONFIG")
|
72
79
|
|
73
80
|
DATA_TYPES: Dict[str, str] = Field(
|
74
81
|
default={
|
@@ -150,6 +157,11 @@ class Config(BaseSettings):
|
|
150
157
|
) -> Path:
|
151
158
|
"""Dynamic path construction for administrative boundary data based on admin level."""
|
152
159
|
base_dir = getattr(self, "ADMIN_BOUNDARIES_DATA_DIR")
|
160
|
+
if base_dir is None:
|
161
|
+
raise ValueError(
|
162
|
+
"ADMIN_BOUNDARIES_DATA_DIR is not configured. "
|
163
|
+
"Please set the ADMIN_BOUNDARIES_DIR environment variable."
|
164
|
+
)
|
153
165
|
level_dir = f"admin{admin_level}"
|
154
166
|
file = f"{country_code}_{level_dir}{file_suffix}"
|
155
167
|
|
@@ -168,7 +180,6 @@ class Config(BaseSettings):
|
|
168
180
|
"SILVER_DATA_DIR",
|
169
181
|
"GOLD_DATA_DIR",
|
170
182
|
"CACHE_DIR",
|
171
|
-
"ADMIN_BOUNDARIES_DATA_DIR",
|
172
183
|
mode="before",
|
173
184
|
)
|
174
185
|
def resolve_and_validate_paths(
|
@@ -186,10 +197,30 @@ class Config(BaseSettings):
|
|
186
197
|
resolved = path.expanduser().resolve()
|
187
198
|
return resolved if resolve else path
|
188
199
|
|
200
|
+
@field_validator("ADMIN_BOUNDARIES_DATA_DIR", mode="before")
|
201
|
+
def validate_admin_boundaries_dir(
|
202
|
+
cls, value: Union[str, Path, None]
|
203
|
+
) -> Optional[Path]:
|
204
|
+
"""Validator for ADMIN_BOUNDARIES_DATA_DIR that handles None and string values."""
|
205
|
+
if value is None:
|
206
|
+
return None
|
207
|
+
if isinstance(value, str):
|
208
|
+
return Path(value)
|
209
|
+
elif isinstance(value, Path):
|
210
|
+
return value
|
211
|
+
else:
|
212
|
+
raise ValueError(
|
213
|
+
f"Invalid path type for ADMIN_BOUNDARIES_DATA_DIR: {type(value)}"
|
214
|
+
)
|
215
|
+
|
189
216
|
def ensure_directories_exist(self, create: bool = False) -> None:
|
190
217
|
"""Ensures all configured directories exist."""
|
191
218
|
for field_name, field_value in self.__dict__.items():
|
192
|
-
if
|
219
|
+
if (
|
220
|
+
isinstance(field_value, Path)
|
221
|
+
and field_value is not None
|
222
|
+
and not field_value.exists()
|
223
|
+
):
|
193
224
|
if create:
|
194
225
|
field_value.mkdir(parents=True, exist_ok=True)
|
195
226
|
else:
|
@@ -1,5 +1,6 @@
|
|
1
1
|
from gigaspatial.core.io.adls_data_store import ADLSDataStore
|
2
2
|
from gigaspatial.core.io.local_data_store import LocalDataStore
|
3
3
|
from gigaspatial.core.io.data_api import GigaDataAPI
|
4
|
+
from gigaspatial.core.io.database import DBConnection
|
4
5
|
from gigaspatial.core.io.readers import *
|
5
6
|
from gigaspatial.core.io.writers import *
|