giga-spatial 0.6.9__tar.gz → 0.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/CHANGELOG.md +54 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/PKG-INFO +30 -4
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/README.md +8 -0
- giga_spatial-0.7.0/docs/examples/processing/tif.md +146 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/giga_spatial.egg-info/PKG-INFO +30 -4
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/giga_spatial.egg-info/SOURCES.txt +1 -0
- giga_spatial-0.6.9/requirements.txt → giga_spatial-0.7.0/giga_spatial.egg-info/requires.txt +4 -2
- giga_spatial-0.7.0/gigaspatial/__init__.py +1 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/core/io/adls_data_store.py +104 -11
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/core/io/local_data_store.py +8 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/generators/zonal/geometry.py +12 -5
- giga_spatial-0.7.0/gigaspatial/grid/h3.py +417 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/grid/mercator_tiles.py +1 -1
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/processing/geo.py +10 -5
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/processing/tif_processor.py +380 -224
- giga_spatial-0.6.9/giga_spatial.egg-info/requires.txt → giga_spatial-0.7.0/requirements.txt +3 -1
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/setup.py +19 -1
- giga_spatial-0.6.9/docs/examples/processing/tif.md +0 -33
- giga_spatial-0.6.9/gigaspatial/__init__.py +0 -1
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/.env_sample +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/CODE_OF_CONDUCT.md +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/CONTRIBUTING.md +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/LICENSE +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/MANIFEST.in +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/PULL_REQUEST_TEMPLATE.md +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/docs/.DS_Store +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/docs/api/core.md +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/docs/api/generators.md +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/docs/api/grid.md +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/docs/api/handlers.md +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/docs/api/index.md +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/docs/api/processing.md +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/docs/assets/GIGA_horizontal_notext_white.webp +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/docs/assets/datasets.png +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/docs/assets/logo.png +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/docs/changelog.md +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/docs/contributing.md +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/docs/examples/advanced.md +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/docs/examples/basic.md +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/docs/examples/downloading/ghsl.md +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/docs/examples/downloading/osm.md +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/docs/examples/index.md +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/docs/examples/use-cases.md +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/docs/getting-started/installation.md +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/docs/getting-started/quickstart.md +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/docs/index.md +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/docs/license.md +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/docs/stylesheets/extra.css +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/docs/user-guide/configuration.md +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/docs/user-guide/index.md +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/giga_spatial.egg-info/dependency_links.txt +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/giga_spatial.egg-info/top_level.txt +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/config.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/core/__init__.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/core/io/__init__.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/core/io/data_api.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/core/io/data_store.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/core/io/database.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/core/io/readers.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/core/io/writers.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/core/schemas/__init__.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/core/schemas/entity.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/generators/__init__.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/generators/poi.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/generators/zonal/__init__.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/generators/zonal/admin.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/generators/zonal/base.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/generators/zonal/mercator.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/grid/__init__.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/handlers/__init__.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/handlers/base.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/handlers/boundaries.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/handlers/ghsl.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/handlers/giga.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/handlers/google_open_buildings.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/handlers/hdx.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/handlers/mapbox_image.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/handlers/maxar_image.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/handlers/microsoft_global_buildings.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/handlers/ookla_speedtest.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/handlers/opencellid.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/handlers/osm.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/handlers/overture.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/handlers/rwi.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/handlers/unicef_georepo.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/handlers/worldpop.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/processing/__init__.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/processing/algorithms.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/processing/sat_images.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/gigaspatial/processing/utils.py +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/pyproject.toml +0 -0
- {giga_spatial-0.6.9 → giga_spatial-0.7.0}/setup.cfg +0 -0
@@ -2,6 +2,60 @@
|
|
2
2
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
4
4
|
|
5
|
+
## [v0.7.0] - 2025-09-17
|
6
|
+
|
7
|
+
### Added
|
8
|
+
|
9
|
+
- **TifProcessor Revamp**
|
10
|
+
- **Explicit Reprojection Method:** Introduced `reproject_to()` method, allowing on-demand reprojection of rasters to a new CRS with customizable `resampling_method` and `resolution`.
|
11
|
+
- **Reprojection Resolution Control:** Added `reprojection_resolution` parameter to `TifProcessor` for precise control over output pixel size during reprojection.
|
12
|
+
- **Advanced Raster Information:** Added `get_raster_info()` method to retrieve a comprehensive dictionary of raster metadata.
|
13
|
+
- **Graph Conversion Capabilities:** Implemented `to_graph()` method to convert raster data into a graph (NetworkX or sparse matrix) based on pixel adjacency (4- or 8-connectivity).
|
14
|
+
- **Internal Refactoring: `_reproject_to_temp_file`:** Introduced `_reproject_to_temp_file` as a helper for reprojection into temporary files.
|
15
|
+
|
16
|
+
- **H3 Grid Generation**
|
17
|
+
- **H3 Grid Generation Module (`gigaspatial/grid/h3.py`):**
|
18
|
+
- Introduced `H3Hexagons` class for managing H3 cell IDs.
|
19
|
+
- Supports creation from lists of hexagons, geographic bounds, spatial geometries, or points.
|
20
|
+
- Provides methods to convert H3 hexagons to pandas DataFrames and GeoPandas GeoDataFrames.
|
21
|
+
- Includes functionalities for filtering, getting k-ring neighbors, compacting hexagons, and getting children/parents at different resolutions.
|
22
|
+
- Allows saving H3Hexagons to JSON, Parquet, or GeoJSON files.
|
23
|
+
- **Country-Specific H3 Hexagons (`CountryH3Hexagons`):**
|
24
|
+
- Extends `H3Hexagons` for generating H3 grids constrained by country boundaries.
|
25
|
+
- Integrates with `AdminBoundaries` to fetch country geometries for precise H3 cell generation.
|
26
|
+
|
27
|
+
- **Documentation**
|
28
|
+
- Improved `tif.md` example to showcase multi-raster initialization, explicit reprojection, and graph conversion.
|
29
|
+
|
30
|
+
### Changed
|
31
|
+
|
32
|
+
- **TifProcessor**
|
33
|
+
- **Improved Temporary File Management:** Refactored temporary file handling for merging and reprojection using `tempfile.mkdtemp()` and `shutil.rmtree` for more robust and reliable cleanup. Integrated with context manager (`__enter__`, `__exit__`) and added a dedicated `cleanup()` method.
|
34
|
+
- **Reprojection during Initialization:** Implemented automatic reprojection of single rasters to a specified `target_crs` during `TifProcessor` initialization.
|
35
|
+
- **Enhanced `open_dataset` Context Manager:** The `open_dataset` context manager now intelligently opens the most up-to-date (merged or reprojected) version of the dataset.
|
36
|
+
- **More Flexible Multi-Dataset Validation:** Modified `_validate_multiple_datasets` to issue a warning instead of raising an error for CRS mismatches when `target_crs` is not set.
|
37
|
+
- **Optimized `_get_reprojection_profile`:** Dynamically calculates transform and dimensions based on `reprojection_resolution` and added LZW compression to reprojected TIFF files to reduce file size.
|
38
|
+
|
39
|
+
- **ADLSDataStore Enhancements**
|
40
|
+
- **New `copy_file` method:** Implemented a new method for copying individual files within ADLS, with an option to overwrite existing files.
|
41
|
+
- **New `rename` method:** Added a new method to rename (move) files in ADLS, which internally uses `copy_file` and then deletes the source, with options for overwrite, waiting for copy completion, and polling.
|
42
|
+
- **Revamped `rmdir` method:** Modified `rmdir` to perform batch deletions of blobs, addressing the Azure Blob batch delete limit (256 sub-requests) and improving efficiency for large directories.
|
43
|
+
|
44
|
+
- **LocalDataStore Enhancements**
|
45
|
+
- **New `copy_file` method:** Implemented a new method for copying individual files.
|
46
|
+
|
47
|
+
### Removed
|
48
|
+
|
49
|
+
- Removed deprecated `tabular` property and `get_zoned_geodataframe` method from `TifProcessor`. Users should now use `to_dataframe()` and `to_geodataframe()` respectively.
|
50
|
+
|
51
|
+
### Dependencies
|
52
|
+
|
53
|
+
- Added `networkx` and `h3` as new dependencies.
|
54
|
+
|
55
|
+
### Fixed
|
56
|
+
|
57
|
+
- Several small fixes and improvements to aggregation methods.
|
58
|
+
|
5
59
|
## [v0.6.9] - 2025-07-26
|
6
60
|
|
7
61
|
### Fixed
|
@@ -1,13 +1,29 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: giga-spatial
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.7.0
|
4
4
|
Summary: A package for spatial data download & processing
|
5
|
-
Home-page: https://github.com/unicef/giga-spatial
|
6
5
|
Author: Utku Can Ozturk
|
7
6
|
Author-email: utkucanozturk@gmail.com
|
8
7
|
License: AGPL-3.0-or-later
|
8
|
+
Project-URL: Homepage, https://github.com/unicef/giga-spatial
|
9
|
+
Project-URL: Documentation, https://unicef.github.io/giga-spatial/
|
10
|
+
Project-URL: Source, https://github.com/unicef/giga-spatial
|
11
|
+
Project-URL: Issue Tracker, https://github.com/unicef/giga-spatial/issues
|
12
|
+
Project-URL: Discussions, https://github.com/unicef/giga-spatial/discussions
|
13
|
+
Project-URL: Changelog, https://unicef.github.io/giga-spatial/changelog
|
9
14
|
Keywords: gigaspatial,spatial,geospatial,gis,remote sensing,data processing,download,openstreetmap,osm,ghsl,grid,point of interest,POI,raster,vector,school connectivity,unicef,giga,mapping,analysis,python
|
15
|
+
Classifier: Development Status :: 5 - Production/Stable
|
16
|
+
Classifier: Intended Audience :: Developers
|
17
|
+
Classifier: Intended Audience :: Education
|
18
|
+
Classifier: Intended Audience :: Healthcare Industry
|
19
|
+
Classifier: Intended Audience :: Science/Research
|
20
|
+
Classifier: Intended Audience :: Telecommunications Industry
|
21
|
+
Classifier: Programming Language :: Python
|
10
22
|
Classifier: Programming Language :: Python :: 3
|
23
|
+
Classifier: Programming Language :: Python :: 3.10
|
24
|
+
Classifier: Programming Language :: Python :: 3.11
|
25
|
+
Classifier: Programming Language :: Python :: 3.12
|
26
|
+
Classifier: Topic :: Scientific/Engineering :: GIS
|
11
27
|
Classifier: License :: OSI Approved :: GNU Affero General Public License v3
|
12
28
|
Classifier: Operating System :: OS Independent
|
13
29
|
Requires-Python: >=3.10
|
@@ -24,8 +40,9 @@ Requires-Dist: pycountry==24.6.1
|
|
24
40
|
Requires-Dist: pydantic>=2.10.6
|
25
41
|
Requires-Dist: rasterio==1.3.10
|
26
42
|
Requires-Dist: Requests==2.32.3
|
27
|
-
Requires-Dist: scipy
|
43
|
+
Requires-Dist: scipy>=1.15.1
|
28
44
|
Requires-Dist: Shapely>=2.0.7
|
45
|
+
Requires-Dist: networkx>=3.2.1
|
29
46
|
Requires-Dist: tqdm==4.65.0
|
30
47
|
Requires-Dist: OWSLib==0.32.1
|
31
48
|
Requires-Dist: pydantic-settings>=2.7.1
|
@@ -33,15 +50,16 @@ Requires-Dist: hdx-python-api>=6.3.8
|
|
33
50
|
Requires-Dist: bs4==0.0.2
|
34
51
|
Requires-Dist: sqlalchemy-trino==0.5.0
|
35
52
|
Requires-Dist: dask>=2024.12.1
|
53
|
+
Requires-Dist: h3>=4.2.0
|
36
54
|
Dynamic: author
|
37
55
|
Dynamic: author-email
|
38
56
|
Dynamic: classifier
|
39
57
|
Dynamic: description
|
40
58
|
Dynamic: description-content-type
|
41
|
-
Dynamic: home-page
|
42
59
|
Dynamic: keywords
|
43
60
|
Dynamic: license
|
44
61
|
Dynamic: license-file
|
62
|
+
Dynamic: project-url
|
45
63
|
Dynamic: requires-dist
|
46
64
|
Dynamic: requires-python
|
47
65
|
Dynamic: summary
|
@@ -53,6 +71,14 @@ Dynamic: summary
|
|
53
71
|
|
54
72
|
# GigaSpatial
|
55
73
|
|
74
|
+
[](https://opensource.org/license/agpl-v3)
|
75
|
+
[](https://badge.fury.io/py/giga-spatial)
|
76
|
+
[](https://pypi.org/project/giga-spatial/)
|
77
|
+
[](https://github.com/psf/black)
|
78
|
+
[](https://pepy.tech/projects/giga-spatial)
|
79
|
+
[](https://github.com/unicef/giga-spatial/graphs/contributors)
|
80
|
+
|
81
|
+
|
56
82
|
## About Giga
|
57
83
|
|
58
84
|
[Giga](https://giga.global/) is a UNICEF-ITU initiative to connect every school to the Internet and every young person to information, opportunity and choice.
|
@@ -5,6 +5,14 @@
|
|
5
5
|
|
6
6
|
# GigaSpatial
|
7
7
|
|
8
|
+
[](https://opensource.org/license/agpl-v3)
|
9
|
+
[](https://badge.fury.io/py/giga-spatial)
|
10
|
+
[](https://pypi.org/project/giga-spatial/)
|
11
|
+
[](https://github.com/psf/black)
|
12
|
+
[](https://pepy.tech/projects/giga-spatial)
|
13
|
+
[](https://github.com/unicef/giga-spatial/graphs/contributors)
|
14
|
+
|
15
|
+
|
8
16
|
## About Giga
|
9
17
|
|
10
18
|
[Giga](https://giga.global/) is a UNICEF-ITU initiative to connect every school to the Internet and every young person to information, opportunity and choice.
|
@@ -0,0 +1,146 @@
|
|
1
|
+
# Processing Raster Files
|
2
|
+
|
3
|
+
This example demonstrates how to process raster files using the `TifProcessor` class.
|
4
|
+
|
5
|
+
## Prerequisites
|
6
|
+
|
7
|
+
Ensure you have installed the `gigaspatial` package and set up the necessary configuration. Follow the [Installation Guide](../getting-started/installation.md) if you haven’t already.
|
8
|
+
|
9
|
+
## Example Code
|
10
|
+
|
11
|
+
```python
|
12
|
+
from gigaspatial.processing import TifProcessor
|
13
|
+
from gigaspatial.core.io import LocalDataStore
|
14
|
+
from rasterio.warp import Resampling # For reprojection methods
|
15
|
+
|
16
|
+
# NOTE: For these examples, replace "/path/to/your/file.tif" with actual paths to your GeoTIFF files.
|
17
|
+
# You might need to create dummy files or use existing ones for local testing.
|
18
|
+
|
19
|
+
# 1. Initialize with a single TIFF file
|
20
|
+
print("--- Single TIFF File Processing ---")
|
21
|
+
single_processor = TifProcessor(
|
22
|
+
"/path/to/single_band.tif",
|
23
|
+
mode="single" # Can be "rgb", "rgba", "multi"
|
24
|
+
)
|
25
|
+
df_single = single_processor.to_dataframe()
|
26
|
+
print("Single-band DataFrame head:")
|
27
|
+
print(df_single.head())
|
28
|
+
print("Raster Info for single_processor:")
|
29
|
+
print(single_processor.get_raster_info())
|
30
|
+
|
31
|
+
|
32
|
+
# 2. Initialize with multiple TIFF files for merging and reprojection
|
33
|
+
print("\n--- Multi-raster Merging and Reprojection ---")
|
34
|
+
# Replace with actual paths to your tif files. Ensure they are compatible for merging.
|
35
|
+
# Example: two adjacent tiles from a dataset.
|
36
|
+
tif_paths = [
|
37
|
+
"/path/to/raster1.tif",
|
38
|
+
"/path/to/raster2.tif"
|
39
|
+
]
|
40
|
+
merged_reprojected_processor = TifProcessor(
|
41
|
+
dataset_path=tif_paths,
|
42
|
+
mode="single", # Or "multi", "rgb", "rgba" depending on your data
|
43
|
+
merge_method="mean", # Options: "first", "last", "min", "max", "mean"
|
44
|
+
target_crs="EPSG:4326", # Reproject all rasters to WGS84 during initialization
|
45
|
+
)
|
46
|
+
df_merged = merged_reprojected_processor.to_dataframe()
|
47
|
+
print("Merged and Reprojected DataFrame head:")
|
48
|
+
print(df_merged.head())
|
49
|
+
print("Raster Info for merged_reprojected_processor:")
|
50
|
+
print(merged_reprojected_processor.get_raster_info())
|
51
|
+
|
52
|
+
# 3. Explicit Reprojection after initialization
|
53
|
+
print("\n--- Explicit Reprojection ---")
|
54
|
+
# Reproject the current raster (e.g., the merged one) to a different CRS or resolution
|
55
|
+
# In a real scenario, you'd save this to a persistent location.
|
56
|
+
reprojected_output_path = "./temp_reprojected_raster.tif"
|
57
|
+
reprojected_path = merged_reprojected_processor.reproject_to(
|
58
|
+
target_crs="EPSG:3857", # Web Mercator
|
59
|
+
output_path=reprojected_output_path,
|
60
|
+
resampling_method=Resampling.bilinear # Different resampling method
|
61
|
+
)
|
62
|
+
print(f"Raster reprojected to: {reprojected_path}")
|
63
|
+
|
64
|
+
# 4. Convert raster to a graph (NetworkX example)
|
65
|
+
print("\n--- Raster to Graph Conversion ---")
|
66
|
+
# Assuming '/path/to/single_band.tif' is a suitable single-band raster
|
67
|
+
graph_processor = TifProcessor(
|
68
|
+
"/path/to/single_band.tif",
|
69
|
+
mode="single" # Graph conversion typically for single-band data
|
70
|
+
)
|
71
|
+
graph = graph_processor.to_graph(
|
72
|
+
connectivity=8, # 4-connectivity (von Neumann) or 8-connectivity (Moore)
|
73
|
+
include_coordinates=True, # Include 'x' and 'y' coordinates as node attributes
|
74
|
+
graph_type="networkx" # Or "sparse" for scipy.sparse.csr_matrix
|
75
|
+
)
|
76
|
+
print(f"Generated a NetworkX graph with {graph.number_of_nodes()} nodes and {graph.number_of_edges()} edges.")
|
77
|
+
# Example: Access node attributes (first 5 nodes)
|
78
|
+
# for node_id, data in list(graph.nodes(data=True))[:5]:
|
79
|
+
# print(f"Node {node_id}: Value={data['value']:.2f}, X={data.get('x'):.2f}, Y={data.get('y'):.2f}")
|
80
|
+
```
|
81
|
+
|
82
|
+
## Explanation
|
83
|
+
|
84
|
+
The `TifProcessor` class provides robust functionalities for handling GeoTIFF files, from single-band to multi-band (RGB, RGBA) datasets, with advanced processing capabilities including:
|
85
|
+
|
86
|
+
- **Initialization**:
|
87
|
+
- Can be initialized with a single GeoTIFF file path.
|
88
|
+
- Supports a list of GeoTIFF file paths for **automatic merging** during initialization, configured via `merge_method` (`first`, `last`, `min`, `max`, `mean`).
|
89
|
+
- The `mode` parameter (`single`, `rgb`, `rgba`, `multi`) dictates how bands are interpreted and validated.
|
90
|
+
- `target_crs` and `reprojection_resolution` can be set during initialization to reproject rasters immediately to a consistent CRS and pixel size.
|
91
|
+
- **Data Extraction**:
|
92
|
+
- `to_dataframe()`: Converts raster data into a pandas DataFrame, with columns for longitude, latitude, and pixel values (or band-specific values for multi-band modes).
|
93
|
+
- `to_geodataframe()`: Extends `to_dataframe()` by adding a `geometry` column, converting each pixel into a GeoDataFrame representing its bounding box, with the correct CRS.
|
94
|
+
- **Reprojection (`reproject_to`)**:
|
95
|
+
- Allows explicit reprojection of the current raster to a new Coordinate Reference System (CRS) and/or resolution, saving the output to a specified path or a temporary file.
|
96
|
+
- Supports different `resampling_method` options (e.g., `Resampling.nearest`, `Resampling.bilinear`).
|
97
|
+
- **Raster Information (`get_raster_info`)**:
|
98
|
+
- Provides a dictionary containing comprehensive metadata about the raster, such as band count, dimensions, CRS, bounds, transform, data types, nodata values, processing mode, and merge status.
|
99
|
+
- **Graph Conversion (`to_graph`)**:
|
100
|
+
- Converts raster data into a graph (NetworkX graph or SciPy sparse matrix) based on pixel adjacency.
|
101
|
+
- Supports `connectivity` of 4 (von Neumann neighborhood) or 8 (Moore neighborhood).
|
102
|
+
- Can include geographic coordinates and pixel values as node attributes.
|
103
|
+
- **Sampling**:
|
104
|
+
- `sample_by_coordinates()`: Extracts pixel values at specific geographic coordinates.
|
105
|
+
- `sample_by_polygons()`: Computes aggregate statistics (e.g., mean, sum, min, max, count) of pixel values within given polygon boundaries, supporting single or multiple statistics.
|
106
|
+
- `sample_by_polygons_batched()`: Provides a parallelized version of polygon sampling for performance-intensive tasks.
|
107
|
+
|
108
|
+
---
|
109
|
+
|
110
|
+
# Multi-raster reprojection
|
111
|
+
|
112
|
+
The differences in the reprojected metadata are expected and are a direct result of the order of operations: **reproject then merge** versus **merge then reproject**. The two processes follow different steps, leading to variations in the final raster's dimensions, bounds, and resolution.
|
113
|
+
|
114
|
+
---
|
115
|
+
|
116
|
+
### **Reproject then Merge**
|
117
|
+
|
118
|
+
When you specify `target_crs` at initialization, the code first **reprojects each individual raster** to the target CRS (`EPSG:4326`) and then **merges the reprojected outputs**.
|
119
|
+
|
120
|
+
- **Step 1: Reprojection**: Each input raster is reprojected from `ESRI:54009` to `EPSG:4326`. During this step, `rasterio`'s `calculate_default_transform` function computes a new transform and pixel dimensions (`width`, `height`) for each raster. The reprojected rasters are now in the same CRS with a consistent resolution (e.g., `0.00918...` degrees).
|
121
|
+
- **Step 2: Merging**: The reprojected rasters, which are now in the same CRS and have similar resolutions, are merged. The `rasterio.merge` function can combine these aligned rasters seamlessly. The final output's dimensions are calculated by finding the union of all reprojected rasters' bounds and applying the shared resolution, resulting in a single, larger raster.
|
122
|
+
|
123
|
+
This process ensures a uniform resolution and grid alignment across all parts of the final merged raster.
|
124
|
+
|
125
|
+
---
|
126
|
+
|
127
|
+
### **Merge then Reproject**
|
128
|
+
|
129
|
+
When `target_crs` is not specified at initialization, the code first **merges the two rasters** in their original `ESRI:54009` CRS and then **reprojects the single, merged output** to `EPSG:4326`.
|
130
|
+
|
131
|
+
- **Step 1: Merging**: The two rasters are merged in `ESRI:54009`. Since they are in the same CRS and have the same resolution (`1000.0` meters), `rasterio.merge` can simply combine them side-by-side. The original raster was `1000x1000`, so merging a second one next to it likely creates a `2000x1000` raster, as seen in the metadata. The resolution remains `1000.0` meters.
|
132
|
+
- **Step 2: Reprojection**: The single `2000x1000` raster is then reprojected to `EPSG:4326`. A new transform and pixel dimensions are calculated for this single, larger raster. Since `calculate_default_transform` is working on a different-shaped input, it will calculate a different output resolution and grid shape. The resulting resolution (`0.00973...`) and dimensions (`2076x832`) will be different because the reprojection is performed on a single, larger input rather than two smaller ones.
|
133
|
+
|
134
|
+
---
|
135
|
+
|
136
|
+
### **Why the Metadata is Different**
|
137
|
+
|
138
|
+
- **Resolution**: The `reproject-then-merge` approach maintains a consistent resolution that is calculated for a single tile and then applied to all. The `merge-then-reproject` approach calculates a single resolution for the entire, larger combined area. The process of resampling to a new grid (a core part of reprojection) is inherently sensitive to the input's size and shape.
|
139
|
+
- **Dimensions (`width`, `height`)**: The final pixel dimensions are a function of the total bounds and the final resolution. Since the resolution is different in the two methods, the width and height must also be different to cover the same geographic area.
|
140
|
+
- **Bounds**: The final bounds are nearly identical in latitude and longitude, which makes sense because both methods represent the same geographic area. Any slight differences are due to rounding and the nuances of resampling.
|
141
|
+
|
142
|
+
**Conclusion**: The differences are normal and reflect the non-commutative nature of these two geospatial operations. The **reproject then merge** approach is generally preferable as it ensures greater consistency and can be more accurate when dealing with rasters that have slightly different resolutions or alignments, as it creates a single, clean grid before combining the data.
|
143
|
+
|
144
|
+
---
|
145
|
+
|
146
|
+
[Back to Examples](../index.md)
|
@@ -1,13 +1,29 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: giga-spatial
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.7.0
|
4
4
|
Summary: A package for spatial data download & processing
|
5
|
-
Home-page: https://github.com/unicef/giga-spatial
|
6
5
|
Author: Utku Can Ozturk
|
7
6
|
Author-email: utkucanozturk@gmail.com
|
8
7
|
License: AGPL-3.0-or-later
|
8
|
+
Project-URL: Homepage, https://github.com/unicef/giga-spatial
|
9
|
+
Project-URL: Documentation, https://unicef.github.io/giga-spatial/
|
10
|
+
Project-URL: Source, https://github.com/unicef/giga-spatial
|
11
|
+
Project-URL: Issue Tracker, https://github.com/unicef/giga-spatial/issues
|
12
|
+
Project-URL: Discussions, https://github.com/unicef/giga-spatial/discussions
|
13
|
+
Project-URL: Changelog, https://unicef.github.io/giga-spatial/changelog
|
9
14
|
Keywords: gigaspatial,spatial,geospatial,gis,remote sensing,data processing,download,openstreetmap,osm,ghsl,grid,point of interest,POI,raster,vector,school connectivity,unicef,giga,mapping,analysis,python
|
15
|
+
Classifier: Development Status :: 5 - Production/Stable
|
16
|
+
Classifier: Intended Audience :: Developers
|
17
|
+
Classifier: Intended Audience :: Education
|
18
|
+
Classifier: Intended Audience :: Healthcare Industry
|
19
|
+
Classifier: Intended Audience :: Science/Research
|
20
|
+
Classifier: Intended Audience :: Telecommunications Industry
|
21
|
+
Classifier: Programming Language :: Python
|
10
22
|
Classifier: Programming Language :: Python :: 3
|
23
|
+
Classifier: Programming Language :: Python :: 3.10
|
24
|
+
Classifier: Programming Language :: Python :: 3.11
|
25
|
+
Classifier: Programming Language :: Python :: 3.12
|
26
|
+
Classifier: Topic :: Scientific/Engineering :: GIS
|
11
27
|
Classifier: License :: OSI Approved :: GNU Affero General Public License v3
|
12
28
|
Classifier: Operating System :: OS Independent
|
13
29
|
Requires-Python: >=3.10
|
@@ -24,8 +40,9 @@ Requires-Dist: pycountry==24.6.1
|
|
24
40
|
Requires-Dist: pydantic>=2.10.6
|
25
41
|
Requires-Dist: rasterio==1.3.10
|
26
42
|
Requires-Dist: Requests==2.32.3
|
27
|
-
Requires-Dist: scipy
|
43
|
+
Requires-Dist: scipy>=1.15.1
|
28
44
|
Requires-Dist: Shapely>=2.0.7
|
45
|
+
Requires-Dist: networkx>=3.2.1
|
29
46
|
Requires-Dist: tqdm==4.65.0
|
30
47
|
Requires-Dist: OWSLib==0.32.1
|
31
48
|
Requires-Dist: pydantic-settings>=2.7.1
|
@@ -33,15 +50,16 @@ Requires-Dist: hdx-python-api>=6.3.8
|
|
33
50
|
Requires-Dist: bs4==0.0.2
|
34
51
|
Requires-Dist: sqlalchemy-trino==0.5.0
|
35
52
|
Requires-Dist: dask>=2024.12.1
|
53
|
+
Requires-Dist: h3>=4.2.0
|
36
54
|
Dynamic: author
|
37
55
|
Dynamic: author-email
|
38
56
|
Dynamic: classifier
|
39
57
|
Dynamic: description
|
40
58
|
Dynamic: description-content-type
|
41
|
-
Dynamic: home-page
|
42
59
|
Dynamic: keywords
|
43
60
|
Dynamic: license
|
44
61
|
Dynamic: license-file
|
62
|
+
Dynamic: project-url
|
45
63
|
Dynamic: requires-dist
|
46
64
|
Dynamic: requires-python
|
47
65
|
Dynamic: summary
|
@@ -53,6 +71,14 @@ Dynamic: summary
|
|
53
71
|
|
54
72
|
# GigaSpatial
|
55
73
|
|
74
|
+
[](https://opensource.org/license/agpl-v3)
|
75
|
+
[](https://badge.fury.io/py/giga-spatial)
|
76
|
+
[](https://pypi.org/project/giga-spatial/)
|
77
|
+
[](https://github.com/psf/black)
|
78
|
+
[](https://pepy.tech/projects/giga-spatial)
|
79
|
+
[](https://github.com/unicef/giga-spatial/graphs/contributors)
|
80
|
+
|
81
|
+
|
56
82
|
## About Giga
|
57
83
|
|
58
84
|
[Giga](https://giga.global/) is a UNICEF-ITU initiative to connect every school to the Internet and every young person to information, opportunity and choice.
|
@@ -61,6 +61,7 @@ gigaspatial/generators/zonal/base.py
|
|
61
61
|
gigaspatial/generators/zonal/geometry.py
|
62
62
|
gigaspatial/generators/zonal/mercator.py
|
63
63
|
gigaspatial/grid/__init__.py
|
64
|
+
gigaspatial/grid/h3.py
|
64
65
|
gigaspatial/grid/mercator_tiles.py
|
65
66
|
gigaspatial/handlers/__init__.py
|
66
67
|
gigaspatial/handlers/base.py
|
@@ -9,12 +9,14 @@ pycountry==24.6.1
|
|
9
9
|
pydantic>=2.10.6
|
10
10
|
rasterio==1.3.10
|
11
11
|
Requests==2.32.3
|
12
|
-
scipy
|
12
|
+
scipy>=1.15.1
|
13
13
|
Shapely>=2.0.7
|
14
|
+
networkx>=3.2.1
|
14
15
|
tqdm==4.65.0
|
15
16
|
OWSLib==0.32.1
|
16
17
|
pydantic-settings>=2.7.1
|
17
18
|
hdx-python-api>=6.3.8
|
18
19
|
bs4==0.0.2
|
19
20
|
sqlalchemy-trino==0.5.0
|
20
|
-
dask>=2024.12.1
|
21
|
+
dask>=2024.12.1
|
22
|
+
h3>=4.2.0
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__ = "0.7.0"
|
@@ -1,4 +1,5 @@
|
|
1
1
|
from azure.storage.blob import BlobServiceClient
|
2
|
+
import time
|
2
3
|
import io
|
3
4
|
import contextlib
|
4
5
|
import logging
|
@@ -151,20 +152,45 @@ class ADLSDataStore(DataStore):
|
|
151
152
|
"\\", "/"
|
152
153
|
)
|
153
154
|
|
154
|
-
#
|
155
|
-
|
155
|
+
# Use copy_file method to copy each file
|
156
|
+
self.copy_file(blob.name, new_blob_path, overwrite=True)
|
156
157
|
|
157
|
-
|
158
|
-
|
159
|
-
|
158
|
+
print(f"Copied directory from {source_dir} to {destination_dir}")
|
159
|
+
except Exception as e:
|
160
|
+
print(f"Failed to copy directory {source_dir}: {e}")
|
161
|
+
|
162
|
+
def copy_file(
|
163
|
+
self, source_path: str, destination_path: str, overwrite: bool = False
|
164
|
+
):
|
165
|
+
"""
|
166
|
+
Copies a single file from source to destination within the same container.
|
167
|
+
|
168
|
+
:param source_path: The source file path in the blob storage
|
169
|
+
:param destination_path: The destination file path in the blob storage
|
170
|
+
:param overwrite: If True, overwrite the destination file if it already exists
|
171
|
+
"""
|
172
|
+
try:
|
173
|
+
if not self.file_exists(source_path):
|
174
|
+
raise FileNotFoundError(f"Source file not found: {source_path}")
|
175
|
+
|
176
|
+
if self.file_exists(destination_path) and not overwrite:
|
177
|
+
raise FileExistsError(
|
178
|
+
f"Destination file already exists and overwrite is False: {destination_path}"
|
160
179
|
)
|
161
180
|
|
162
|
-
|
163
|
-
|
181
|
+
# Create source and destination blob clients
|
182
|
+
source_blob_client = self.container_client.get_blob_client(source_path)
|
183
|
+
destination_blob_client = self.container_client.get_blob_client(
|
184
|
+
destination_path
|
185
|
+
)
|
164
186
|
|
165
|
-
|
187
|
+
# Start the server-side copy operation
|
188
|
+
destination_blob_client.start_copy_from_url(source_blob_client.url)
|
189
|
+
|
190
|
+
print(f"Copied file from {source_path} to {destination_path}")
|
166
191
|
except Exception as e:
|
167
|
-
print(f"Failed to copy
|
192
|
+
print(f"Failed to copy file {source_path}: {e}")
|
193
|
+
raise
|
168
194
|
|
169
195
|
def exists(self, path: str) -> bool:
|
170
196
|
blob_client = self.blob_service_client.get_blob_client(
|
@@ -285,8 +311,20 @@ class ADLSDataStore(DataStore):
|
|
285
311
|
return False
|
286
312
|
|
287
313
|
def rmdir(self, dir: str) -> None:
|
288
|
-
|
289
|
-
|
314
|
+
# Normalize directory path to ensure it targets all children
|
315
|
+
dir_path = dir.rstrip("/") + "/"
|
316
|
+
|
317
|
+
# Azure Blob batch delete has a hard limit on number of sub-requests
|
318
|
+
# per batch (currently 256). Delete in chunks to avoid
|
319
|
+
# ExceedsMaxBatchRequestCount errors.
|
320
|
+
blobs = list(self.list_files(dir_path))
|
321
|
+
if not blobs:
|
322
|
+
return
|
323
|
+
|
324
|
+
BATCH_LIMIT = 256
|
325
|
+
for start_idx in range(0, len(blobs), BATCH_LIMIT):
|
326
|
+
batch = blobs[start_idx : start_idx + BATCH_LIMIT]
|
327
|
+
self.container_client.delete_blobs(*batch)
|
290
328
|
|
291
329
|
def mkdir(self, path: str, exist_ok: bool = False) -> None:
|
292
330
|
"""
|
@@ -323,3 +361,58 @@ class ADLSDataStore(DataStore):
|
|
323
361
|
)
|
324
362
|
if blob_client.exists():
|
325
363
|
blob_client.delete_blob()
|
364
|
+
|
365
|
+
def rename(
|
366
|
+
self,
|
367
|
+
source_path: str,
|
368
|
+
destination_path: str,
|
369
|
+
overwrite: bool = False,
|
370
|
+
delete_source: bool = True,
|
371
|
+
wait: bool = True,
|
372
|
+
timeout_seconds: int = 300,
|
373
|
+
poll_interval_seconds: int = 1,
|
374
|
+
) -> None:
|
375
|
+
"""
|
376
|
+
Rename (move) a single file by copying to the new path and deleting the source.
|
377
|
+
|
378
|
+
:param source_path: Existing blob path
|
379
|
+
:param destination_path: Target blob path
|
380
|
+
:param overwrite: Overwrite destination if it already exists
|
381
|
+
:param delete_source: Delete original after successful copy
|
382
|
+
:param wait: Wait for the copy operation to complete
|
383
|
+
:param timeout_seconds: Max time to wait for copy to succeed
|
384
|
+
:param poll_interval_seconds: Polling interval while waiting
|
385
|
+
"""
|
386
|
+
|
387
|
+
if not self.file_exists(source_path):
|
388
|
+
raise FileNotFoundError(f"Source file not found: {source_path}")
|
389
|
+
|
390
|
+
if self.file_exists(destination_path) and not overwrite:
|
391
|
+
raise FileExistsError(
|
392
|
+
f"Destination already exists and overwrite is False: {destination_path}"
|
393
|
+
)
|
394
|
+
|
395
|
+
# Use copy_file method to copy the file
|
396
|
+
self.copy_file(source_path, destination_path, overwrite=overwrite)
|
397
|
+
|
398
|
+
if wait:
|
399
|
+
# Wait for copy to complete if requested
|
400
|
+
dest_client = self.container_client.get_blob_client(destination_path)
|
401
|
+
deadline = time.time() + timeout_seconds
|
402
|
+
while True:
|
403
|
+
props = dest_client.get_blob_properties()
|
404
|
+
status = getattr(props.copy, "status", None)
|
405
|
+
if status == "success":
|
406
|
+
break
|
407
|
+
if status in {"aborted", "failed"}:
|
408
|
+
raise IOError(
|
409
|
+
f"Copy failed with status {status} from {source_path} to {destination_path}"
|
410
|
+
)
|
411
|
+
if time.time() > deadline:
|
412
|
+
raise TimeoutError(
|
413
|
+
f"Timed out waiting for copy to complete for {destination_path}"
|
414
|
+
)
|
415
|
+
time.sleep(poll_interval_seconds)
|
416
|
+
|
417
|
+
if delete_source:
|
418
|
+
self.remove(source_path)
|
@@ -1,5 +1,6 @@
|
|
1
1
|
from pathlib import Path
|
2
2
|
import os
|
3
|
+
import shutil
|
3
4
|
from typing import Any, List, Generator, Tuple, Union, IO
|
4
5
|
|
5
6
|
from .data_store import DataStore
|
@@ -79,6 +80,13 @@ class LocalDataStore(DataStore):
|
|
79
80
|
if full_path.is_file():
|
80
81
|
os.remove(full_path)
|
81
82
|
|
83
|
+
def copy_file(self, src: str, dst: str) -> None:
|
84
|
+
"""Copy a file from src to dst."""
|
85
|
+
src_path = self._resolve_path(src)
|
86
|
+
dst_path = self._resolve_path(dst)
|
87
|
+
self.mkdir(str(dst_path.parent), exist_ok=True)
|
88
|
+
shutil.copy2(src_path, dst_path)
|
89
|
+
|
82
90
|
def rmdir(self, directory: str) -> None:
|
83
91
|
full_path = self._resolve_path(directory)
|
84
92
|
if full_path.is_dir():
|
@@ -145,11 +145,18 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
|
|
145
145
|
gpd.GeoDataFrame: A GeoDataFrame with 'zone_id' and 'geometry' columns.
|
146
146
|
The zone_id column is renamed from the original zone_id_column if different.
|
147
147
|
"""
|
148
|
-
#
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
148
|
+
# Since _zone_gdf is already created with 'zone_id' column in the constructor,
|
149
|
+
# we just need to return a copy of it
|
150
|
+
return self._zone_gdf.copy()
|
151
|
+
|
152
|
+
@property
|
153
|
+
def zone_gdf(self) -> gpd.GeoDataFrame:
|
154
|
+
"""Override the base class zone_gdf property to ensure correct column names.
|
155
|
+
|
156
|
+
Returns:
|
157
|
+
gpd.GeoDataFrame: A GeoDataFrame with 'zone_id' and 'geometry' columns.
|
158
|
+
"""
|
159
|
+
return self._zone_gdf.copy()
|
153
160
|
|
154
161
|
def map_built_s(
|
155
162
|
self,
|