giga-spatial 0.6.8__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/CHANGELOG.md +60 -0
  2. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/PKG-INFO +30 -4
  3. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/README.md +8 -0
  4. giga_spatial-0.7.0/docs/examples/processing/tif.md +146 -0
  5. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/giga_spatial.egg-info/PKG-INFO +30 -4
  6. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/giga_spatial.egg-info/SOURCES.txt +1 -0
  7. giga_spatial-0.6.8/requirements.txt → giga_spatial-0.7.0/giga_spatial.egg-info/requires.txt +4 -2
  8. giga_spatial-0.7.0/gigaspatial/__init__.py +1 -0
  9. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/core/io/adls_data_store.py +104 -11
  10. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/core/io/local_data_store.py +8 -0
  11. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/generators/zonal/geometry.py +12 -5
  12. giga_spatial-0.7.0/gigaspatial/grid/h3.py +417 -0
  13. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/grid/mercator_tiles.py +1 -1
  14. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/handlers/base.py +2 -2
  15. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/processing/geo.py +10 -5
  16. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/processing/tif_processor.py +380 -224
  17. giga_spatial-0.6.8/giga_spatial.egg-info/requires.txt → giga_spatial-0.7.0/requirements.txt +3 -1
  18. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/setup.py +19 -1
  19. giga_spatial-0.6.8/docs/examples/processing/tif.md +0 -33
  20. giga_spatial-0.6.8/gigaspatial/__init__.py +0 -1
  21. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/.env_sample +0 -0
  22. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/CODE_OF_CONDUCT.md +0 -0
  23. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/CONTRIBUTING.md +0 -0
  24. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/LICENSE +0 -0
  25. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/MANIFEST.in +0 -0
  26. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/PULL_REQUEST_TEMPLATE.md +0 -0
  27. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/docs/.DS_Store +0 -0
  28. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/docs/api/core.md +0 -0
  29. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/docs/api/generators.md +0 -0
  30. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/docs/api/grid.md +0 -0
  31. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/docs/api/handlers.md +0 -0
  32. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/docs/api/index.md +0 -0
  33. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/docs/api/processing.md +0 -0
  34. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/docs/assets/GIGA_horizontal_notext_white.webp +0 -0
  35. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/docs/assets/datasets.png +0 -0
  36. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/docs/assets/logo.png +0 -0
  37. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/docs/changelog.md +0 -0
  38. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/docs/contributing.md +0 -0
  39. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/docs/examples/advanced.md +0 -0
  40. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/docs/examples/basic.md +0 -0
  41. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/docs/examples/downloading/ghsl.md +0 -0
  42. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/docs/examples/downloading/osm.md +0 -0
  43. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/docs/examples/index.md +0 -0
  44. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/docs/examples/use-cases.md +0 -0
  45. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/docs/getting-started/installation.md +0 -0
  46. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/docs/getting-started/quickstart.md +0 -0
  47. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/docs/index.md +0 -0
  48. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/docs/license.md +0 -0
  49. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/docs/stylesheets/extra.css +0 -0
  50. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/docs/user-guide/configuration.md +0 -0
  51. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/docs/user-guide/index.md +0 -0
  52. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/giga_spatial.egg-info/dependency_links.txt +0 -0
  53. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/giga_spatial.egg-info/top_level.txt +0 -0
  54. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/config.py +0 -0
  55. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/core/__init__.py +0 -0
  56. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/core/io/__init__.py +0 -0
  57. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/core/io/data_api.py +0 -0
  58. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/core/io/data_store.py +0 -0
  59. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/core/io/database.py +0 -0
  60. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/core/io/readers.py +0 -0
  61. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/core/io/writers.py +0 -0
  62. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/core/schemas/__init__.py +0 -0
  63. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/core/schemas/entity.py +0 -0
  64. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/generators/__init__.py +0 -0
  65. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/generators/poi.py +0 -0
  66. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/generators/zonal/__init__.py +0 -0
  67. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/generators/zonal/admin.py +0 -0
  68. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/generators/zonal/base.py +0 -0
  69. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/generators/zonal/mercator.py +0 -0
  70. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/grid/__init__.py +0 -0
  71. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/handlers/__init__.py +0 -0
  72. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/handlers/boundaries.py +0 -0
  73. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/handlers/ghsl.py +0 -0
  74. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/handlers/giga.py +0 -0
  75. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/handlers/google_open_buildings.py +0 -0
  76. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/handlers/hdx.py +0 -0
  77. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/handlers/mapbox_image.py +0 -0
  78. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/handlers/maxar_image.py +0 -0
  79. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/handlers/microsoft_global_buildings.py +0 -0
  80. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/handlers/ookla_speedtest.py +0 -0
  81. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/handlers/opencellid.py +0 -0
  82. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/handlers/osm.py +0 -0
  83. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/handlers/overture.py +0 -0
  84. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/handlers/rwi.py +0 -0
  85. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/handlers/unicef_georepo.py +0 -0
  86. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/handlers/worldpop.py +0 -0
  87. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/processing/__init__.py +0 -0
  88. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/processing/algorithms.py +0 -0
  89. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/processing/sat_images.py +0 -0
  90. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/gigaspatial/processing/utils.py +0 -0
  91. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/pyproject.toml +0 -0
  92. {giga_spatial-0.6.8 → giga_spatial-0.7.0}/setup.cfg +0 -0
@@ -2,6 +2,66 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
+ ## [v0.7.0] - 2025-09-17
6
+
7
+ ### Added
8
+
9
+ - **TifProcessor Revamp**
10
+ - **Explicit Reprojection Method:** Introduced `reproject_to()` method, allowing on-demand reprojection of rasters to a new CRS with customizable `resampling_method` and `resolution`.
11
+ - **Reprojection Resolution Control:** Added `reprojection_resolution` parameter to `TifProcessor` for precise control over output pixel size during reprojection.
12
+ - **Advanced Raster Information:** Added `get_raster_info()` method to retrieve a comprehensive dictionary of raster metadata.
13
+ - **Graph Conversion Capabilities:** Implemented `to_graph()` method to convert raster data into a graph (NetworkX or sparse matrix) based on pixel adjacency (4- or 8-connectivity).
14
+ - **Internal Refactoring: `_reproject_to_temp_file`:** Introduced `_reproject_to_temp_file` as a helper for reprojection into temporary files.
15
+
16
+ - **H3 Grid Generation**
17
+ - **H3 Grid Generation Module (`gigaspatial/grid/h3.py`):**
18
+ - Introduced `H3Hexagons` class for managing H3 cell IDs.
19
+ - Supports creation from lists of hexagons, geographic bounds, spatial geometries, or points.
20
+ - Provides methods to convert H3 hexagons to pandas DataFrames and GeoPandas GeoDataFrames.
21
+ - Includes functionalities for filtering, getting k-ring neighbors, compacting hexagons, and getting children/parents at different resolutions.
22
+ - Allows saving H3Hexagons to JSON, Parquet, or GeoJSON files.
23
+ - **Country-Specific H3 Hexagons (`CountryH3Hexagons`):**
24
+ - Extends `H3Hexagons` for generating H3 grids constrained by country boundaries.
25
+ - Integrates with `AdminBoundaries` to fetch country geometries for precise H3 cell generation.
26
+
27
+ - **Documentation**
28
+ - Improved `tif.md` example to showcase multi-raster initialization, explicit reprojection, and graph conversion.
29
+
30
+ ### Changed
31
+
32
+ - **TifProcessor**
33
+ - **Improved Temporary File Management:** Refactored temporary file handling for merging and reprojection using `tempfile.mkdtemp()` and `shutil.rmtree` for more robust and reliable cleanup. Integrated with context manager (`__enter__`, `__exit__`) and added a dedicated `cleanup()` method.
34
+ - **Reprojection during Initialization:** Implemented automatic reprojection of single rasters to a specified `target_crs` during `TifProcessor` initialization.
35
+ - **Enhanced `open_dataset` Context Manager:** The `open_dataset` context manager now intelligently opens the most up-to-date (merged or reprojected) version of the dataset.
36
+ - **More Flexible Multi-Dataset Validation:** Modified `_validate_multiple_datasets` to issue a warning instead of raising an error for CRS mismatches when `target_crs` is not set.
37
+ - **Optimized `_get_reprojection_profile`:** Dynamically calculates transform and dimensions based on `reprojection_resolution` and added LZW compression to reprojected TIFF files to reduce file size.
38
+
39
+ - **ADLSDataStore Enhancements**
40
+ - **New `copy_file` method:** Implemented a new method for copying individual files within ADLS, with an option to overwrite existing files.
41
+ - **New `rename` method:** Added a new method to rename (move) files in ADLS, which internally uses `copy_file` and then deletes the source, with options for overwrite, waiting for copy completion, and polling.
42
+ - **Revamped `rmdir` method:** Modified `rmdir` to perform batch deletions of blobs, addressing the Azure Blob batch delete limit (256 sub-requests) and improving efficiency for large directories.
43
+
44
+ - **LocalDataStore Enhancements**
45
+ - **New `copy_file` method:** Implemented a new method for copying individual files.
46
+
47
+ ### Removed
48
+
49
+ - Removed deprecated `tabular` property and `get_zoned_geodataframe` method from `TifProcessor`. Users should now use `to_dataframe()` and `to_geodataframe()` respectively.
50
+
51
+ ### Dependencies
52
+
53
+ - Added `networkx` and `h3` as new dependencies.
54
+
55
+ ### Fixed
56
+
57
+ - Several small fixes and improvements to aggregation methods.
58
+
59
+ ## [v0.6.9] - 2025-07-26
60
+
61
+ ### Fixed
62
+
63
+ - Resolved a bug in the handler base class where non-hashable types (dicts) were incorrectly used as dictionary keys in `unit_to_path` mapping, preventing potential runtime errors during data availability checks.
64
+
5
65
  ## [v0.6.8] - 2025-07-26
6
66
 
7
67
  ### Added
@@ -1,13 +1,29 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: giga-spatial
3
- Version: 0.6.8
3
+ Version: 0.7.0
4
4
  Summary: A package for spatial data download & processing
5
- Home-page: https://github.com/unicef/giga-spatial
6
5
  Author: Utku Can Ozturk
7
6
  Author-email: utkucanozturk@gmail.com
8
7
  License: AGPL-3.0-or-later
8
+ Project-URL: Homepage, https://github.com/unicef/giga-spatial
9
+ Project-URL: Documentation, https://unicef.github.io/giga-spatial/
10
+ Project-URL: Source, https://github.com/unicef/giga-spatial
11
+ Project-URL: Issue Tracker, https://github.com/unicef/giga-spatial/issues
12
+ Project-URL: Discussions, https://github.com/unicef/giga-spatial/discussions
13
+ Project-URL: Changelog, https://unicef.github.io/giga-spatial/changelog
9
14
  Keywords: gigaspatial,spatial,geospatial,gis,remote sensing,data processing,download,openstreetmap,osm,ghsl,grid,point of interest,POI,raster,vector,school connectivity,unicef,giga,mapping,analysis,python
15
+ Classifier: Development Status :: 5 - Production/Stable
16
+ Classifier: Intended Audience :: Developers
17
+ Classifier: Intended Audience :: Education
18
+ Classifier: Intended Audience :: Healthcare Industry
19
+ Classifier: Intended Audience :: Science/Research
20
+ Classifier: Intended Audience :: Telecommunications Industry
21
+ Classifier: Programming Language :: Python
10
22
  Classifier: Programming Language :: Python :: 3
23
+ Classifier: Programming Language :: Python :: 3.10
24
+ Classifier: Programming Language :: Python :: 3.11
25
+ Classifier: Programming Language :: Python :: 3.12
26
+ Classifier: Topic :: Scientific/Engineering :: GIS
11
27
  Classifier: License :: OSI Approved :: GNU Affero General Public License v3
12
28
  Classifier: Operating System :: OS Independent
13
29
  Requires-Python: >=3.10
@@ -24,8 +40,9 @@ Requires-Dist: pycountry==24.6.1
24
40
  Requires-Dist: pydantic>=2.10.6
25
41
  Requires-Dist: rasterio==1.3.10
26
42
  Requires-Dist: Requests==2.32.3
27
- Requires-Dist: scipy==1.15.1
43
+ Requires-Dist: scipy>=1.15.1
28
44
  Requires-Dist: Shapely>=2.0.7
45
+ Requires-Dist: networkx>=3.2.1
29
46
  Requires-Dist: tqdm==4.65.0
30
47
  Requires-Dist: OWSLib==0.32.1
31
48
  Requires-Dist: pydantic-settings>=2.7.1
@@ -33,15 +50,16 @@ Requires-Dist: hdx-python-api>=6.3.8
33
50
  Requires-Dist: bs4==0.0.2
34
51
  Requires-Dist: sqlalchemy-trino==0.5.0
35
52
  Requires-Dist: dask>=2024.12.1
53
+ Requires-Dist: h3>=4.2.0
36
54
  Dynamic: author
37
55
  Dynamic: author-email
38
56
  Dynamic: classifier
39
57
  Dynamic: description
40
58
  Dynamic: description-content-type
41
- Dynamic: home-page
42
59
  Dynamic: keywords
43
60
  Dynamic: license
44
61
  Dynamic: license-file
62
+ Dynamic: project-url
45
63
  Dynamic: requires-dist
46
64
  Dynamic: requires-python
47
65
  Dynamic: summary
@@ -53,6 +71,14 @@ Dynamic: summary
53
71
 
54
72
  # GigaSpatial
55
73
 
74
+ [![License: AGPL v3](https://img.shields.io/badge/License-AGPL%20v3-brightgreen.svg)](https://opensource.org/license/agpl-v3)
75
+ [![PyPI version](https://badge.fury.io/py/giga-spatial.svg)](https://badge.fury.io/py/giga-spatial)
76
+ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/giga-spatial.svg?color=dark-green)](https://pypi.org/project/giga-spatial/)
77
+ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
78
+ [![PyPI Downloads](https://static.pepy.tech/badge/giga-spatial)](https://pepy.tech/projects/giga-spatial)
79
+ [![GitHub commit activity](https://img.shields.io/github/commit-activity/y/unicef/giga-spatial.svg?color=dark-green)](https://github.com/unicef/giga-spatial/graphs/contributors)
80
+
81
+
56
82
  ## About Giga
57
83
 
58
84
  [Giga](https://giga.global/) is a UNICEF-ITU initiative to connect every school to the Internet and every young person to information, opportunity and choice.
@@ -5,6 +5,14 @@
5
5
 
6
6
  # GigaSpatial
7
7
 
8
+ [![License: AGPL v3](https://img.shields.io/badge/License-AGPL%20v3-brightgreen.svg)](https://opensource.org/license/agpl-v3)
9
+ [![PyPI version](https://badge.fury.io/py/giga-spatial.svg)](https://badge.fury.io/py/giga-spatial)
10
+ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/giga-spatial.svg?color=dark-green)](https://pypi.org/project/giga-spatial/)
11
+ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
12
+ [![PyPI Downloads](https://static.pepy.tech/badge/giga-spatial)](https://pepy.tech/projects/giga-spatial)
13
+ [![GitHub commit activity](https://img.shields.io/github/commit-activity/y/unicef/giga-spatial.svg?color=dark-green)](https://github.com/unicef/giga-spatial/graphs/contributors)
14
+
15
+
8
16
  ## About Giga
9
17
 
10
18
  [Giga](https://giga.global/) is a UNICEF-ITU initiative to connect every school to the Internet and every young person to information, opportunity and choice.
@@ -0,0 +1,146 @@
1
+ # Processing Raster Files
2
+
3
+ This example demonstrates how to process raster files using the `TifProcessor` class.
4
+
5
+ ## Prerequisites
6
+
7
+ Ensure you have installed the `gigaspatial` package and set up the necessary configuration. Follow the [Installation Guide](../getting-started/installation.md) if you haven’t already.
8
+
9
+ ## Example Code
10
+
11
+ ```python
12
+ from gigaspatial.processing import TifProcessor
13
+ from gigaspatial.core.io import LocalDataStore
14
+ from rasterio.warp import Resampling # For reprojection methods
15
+
16
+ # NOTE: For these examples, replace "/path/to/your/file.tif" with actual paths to your GeoTIFF files.
17
+ # You might need to create dummy files or use existing ones for local testing.
18
+
19
+ # 1. Initialize with a single TIFF file
20
+ print("--- Single TIFF File Processing ---")
21
+ single_processor = TifProcessor(
22
+ "/path/to/single_band.tif",
23
+ mode="single" # Can be "rgb", "rgba", "multi"
24
+ )
25
+ df_single = single_processor.to_dataframe()
26
+ print("Single-band DataFrame head:")
27
+ print(df_single.head())
28
+ print("Raster Info for single_processor:")
29
+ print(single_processor.get_raster_info())
30
+
31
+
32
+ # 2. Initialize with multiple TIFF files for merging and reprojection
33
+ print("\n--- Multi-raster Merging and Reprojection ---")
34
+ # Replace with actual paths to your tif files. Ensure they are compatible for merging.
35
+ # Example: two adjacent tiles from a dataset.
36
+ tif_paths = [
37
+ "/path/to/raster1.tif",
38
+ "/path/to/raster2.tif"
39
+ ]
40
+ merged_reprojected_processor = TifProcessor(
41
+ dataset_path=tif_paths,
42
+ mode="single", # Or "multi", "rgb", "rgba" depending on your data
43
+ merge_method="mean", # Options: "first", "last", "min", "max", "mean"
44
+ target_crs="EPSG:4326", # Reproject all rasters to WGS84 during initialization
45
+ )
46
+ df_merged = merged_reprojected_processor.to_dataframe()
47
+ print("Merged and Reprojected DataFrame head:")
48
+ print(df_merged.head())
49
+ print("Raster Info for merged_reprojected_processor:")
50
+ print(merged_reprojected_processor.get_raster_info())
51
+
52
+ # 3. Explicit Reprojection after initialization
53
+ print("\n--- Explicit Reprojection ---")
54
+ # Reproject the current raster (e.g., the merged one) to a different CRS or resolution
55
+ # In a real scenario, you'd save this to a persistent location.
56
+ reprojected_output_path = "./temp_reprojected_raster.tif"
57
+ reprojected_path = merged_reprojected_processor.reproject_to(
58
+ target_crs="EPSG:3857", # Web Mercator
59
+ output_path=reprojected_output_path,
60
+ resampling_method=Resampling.bilinear # Different resampling method
61
+ )
62
+ print(f"Raster reprojected to: {reprojected_path}")
63
+
64
+ # 4. Convert raster to a graph (NetworkX example)
65
+ print("\n--- Raster to Graph Conversion ---")
66
+ # Assuming '/path/to/single_band.tif' is a suitable single-band raster
67
+ graph_processor = TifProcessor(
68
+ "/path/to/single_band.tif",
69
+ mode="single" # Graph conversion typically for single-band data
70
+ )
71
+ graph = graph_processor.to_graph(
72
+ connectivity=8, # 4-connectivity (von Neumann) or 8-connectivity (Moore)
73
+ include_coordinates=True, # Include 'x' and 'y' coordinates as node attributes
74
+ graph_type="networkx" # Or "sparse" for scipy.sparse.csr_matrix
75
+ )
76
+ print(f"Generated a NetworkX graph with {graph.number_of_nodes()} nodes and {graph.number_of_edges()} edges.")
77
+ # Example: Access node attributes (first 5 nodes)
78
+ # for node_id, data in list(graph.nodes(data=True))[:5]:
79
+ # print(f"Node {node_id}: Value={data['value']:.2f}, X={data.get('x'):.2f}, Y={data.get('y'):.2f}")
80
+ ```
81
+
82
+ ## Explanation
83
+
84
+ The `TifProcessor` class provides robust functionalities for handling GeoTIFF files, from single-band to multi-band (RGB, RGBA) datasets, with advanced processing capabilities including:
85
+
86
+ - **Initialization**:
87
+ - Can be initialized with a single GeoTIFF file path.
88
+ - Supports a list of GeoTIFF file paths for **automatic merging** during initialization, configured via `merge_method` (`first`, `last`, `min`, `max`, `mean`).
89
+ - The `mode` parameter (`single`, `rgb`, `rgba`, `multi`) dictates how bands are interpreted and validated.
90
+ - `target_crs` and `reprojection_resolution` can be set during initialization to reproject rasters immediately to a consistent CRS and pixel size.
91
+ - **Data Extraction**:
92
+ - `to_dataframe()`: Converts raster data into a pandas DataFrame, with columns for longitude, latitude, and pixel values (or band-specific values for multi-band modes).
93
+ - `to_geodataframe()`: Extends `to_dataframe()` by adding a `geometry` column, converting each pixel into a GeoDataFrame representing its bounding box, with the correct CRS.
94
+ - **Reprojection (`reproject_to`)**:
95
+ - Allows explicit reprojection of the current raster to a new Coordinate Reference System (CRS) and/or resolution, saving the output to a specified path or a temporary file.
96
+ - Supports different `resampling_method` options (e.g., `Resampling.nearest`, `Resampling.bilinear`).
97
+ - **Raster Information (`get_raster_info`)**:
98
+ - Provides a dictionary containing comprehensive metadata about the raster, such as band count, dimensions, CRS, bounds, transform, data types, nodata values, processing mode, and merge status.
99
+ - **Graph Conversion (`to_graph`)**:
100
+ - Converts raster data into a graph (NetworkX graph or SciPy sparse matrix) based on pixel adjacency.
101
+ - Supports `connectivity` of 4 (von Neumann neighborhood) or 8 (Moore neighborhood).
102
+ - Can include geographic coordinates and pixel values as node attributes.
103
+ - **Sampling**:
104
+ - `sample_by_coordinates()`: Extracts pixel values at specific geographic coordinates.
105
+ - `sample_by_polygons()`: Computes aggregate statistics (e.g., mean, sum, min, max, count) of pixel values within given polygon boundaries, supporting single or multiple statistics.
106
+ - `sample_by_polygons_batched()`: Provides a parallelized version of polygon sampling for performance-intensive tasks.
107
+
108
+ ---
109
+
110
+ # Multi-raster reprojection
111
+
112
+ The differences in the reprojected metadata are expected and are a direct result of the order of operations: **reproject then merge** versus **merge then reproject**. The two processes follow different steps, leading to variations in the final raster's dimensions, bounds, and resolution.
113
+
114
+ ---
115
+
116
+ ### **Reproject then Merge**
117
+
118
+ When you specify `target_crs` at initialization, the code first **reprojects each individual raster** to the target CRS (`EPSG:4326`) and then **merges the reprojected outputs**.
119
+
120
+ - **Step 1: Reprojection**: Each input raster is reprojected from `ESRI:54009` to `EPSG:4326`. During this step, `rasterio`'s `calculate_default_transform` function computes a new transform and pixel dimensions (`width`, `height`) for each raster. The reprojected rasters are now in the same CRS with a consistent resolution (e.g., `0.00918...` degrees).
121
+ - **Step 2: Merging**: The reprojected rasters, which are now in the same CRS and have similar resolutions, are merged. The `rasterio.merge` function can combine these aligned rasters seamlessly. The final output's dimensions are calculated by finding the union of all reprojected rasters' bounds and applying the shared resolution, resulting in a single, larger raster.
122
+
123
+ This process ensures a uniform resolution and grid alignment across all parts of the final merged raster.
124
+
125
+ ---
126
+
127
+ ### **Merge then Reproject**
128
+
129
+ When `target_crs` is not specified at initialization, the code first **merges the two rasters** in their original `ESRI:54009` CRS and then **reprojects the single, merged output** to `EPSG:4326`.
130
+
131
+ - **Step 1: Merging**: The two rasters are merged in `ESRI:54009`. Since they are in the same CRS and have the same resolution (`1000.0` meters), `rasterio.merge` can simply combine them side-by-side. The original raster was `1000x1000`, so merging a second one next to it likely creates a `2000x1000` raster, as seen in the metadata. The resolution remains `1000.0` meters.
132
+ - **Step 2: Reprojection**: The single `2000x1000` raster is then reprojected to `EPSG:4326`. A new transform and pixel dimensions are calculated for this single, larger raster. Since `calculate_default_transform` is working on a different-shaped input, it will calculate a different output resolution and grid shape. The resulting resolution (`0.00973...`) and dimensions (`2076x832`) will be different because the reprojection is performed on a single, larger input rather than two smaller ones.
133
+
134
+ ---
135
+
136
+ ### **Why the Metadata is Different**
137
+
138
+ - **Resolution**: The `reproject-then-merge` approach maintains a consistent resolution that is calculated for a single tile and then applied to all. The `merge-then-reproject` approach calculates a single resolution for the entire, larger combined area. The process of resampling to a new grid (a core part of reprojection) is inherently sensitive to the input's size and shape.
139
+ - **Dimensions (`width`, `height`)**: The final pixel dimensions are a function of the total bounds and the final resolution. Since the resolution is different in the two methods, the width and height must also be different to cover the same geographic area.
140
+ - **Bounds**: The final bounds are nearly identical in latitude and longitude, which makes sense because both methods represent the same geographic area. Any slight differences are due to rounding and the nuances of resampling.
141
+
142
+ **Conclusion**: The differences are normal and reflect the non-commutative nature of these two geospatial operations. The **reproject then merge** approach is generally preferable as it ensures greater consistency and can be more accurate when dealing with rasters that have slightly different resolutions or alignments, as it creates a single, clean grid before combining the data.
143
+
144
+ ---
145
+
146
+ [Back to Examples](../index.md)
@@ -1,13 +1,29 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: giga-spatial
3
- Version: 0.6.8
3
+ Version: 0.7.0
4
4
  Summary: A package for spatial data download & processing
5
- Home-page: https://github.com/unicef/giga-spatial
6
5
  Author: Utku Can Ozturk
7
6
  Author-email: utkucanozturk@gmail.com
8
7
  License: AGPL-3.0-or-later
8
+ Project-URL: Homepage, https://github.com/unicef/giga-spatial
9
+ Project-URL: Documentation, https://unicef.github.io/giga-spatial/
10
+ Project-URL: Source, https://github.com/unicef/giga-spatial
11
+ Project-URL: Issue Tracker, https://github.com/unicef/giga-spatial/issues
12
+ Project-URL: Discussions, https://github.com/unicef/giga-spatial/discussions
13
+ Project-URL: Changelog, https://unicef.github.io/giga-spatial/changelog
9
14
  Keywords: gigaspatial,spatial,geospatial,gis,remote sensing,data processing,download,openstreetmap,osm,ghsl,grid,point of interest,POI,raster,vector,school connectivity,unicef,giga,mapping,analysis,python
15
+ Classifier: Development Status :: 5 - Production/Stable
16
+ Classifier: Intended Audience :: Developers
17
+ Classifier: Intended Audience :: Education
18
+ Classifier: Intended Audience :: Healthcare Industry
19
+ Classifier: Intended Audience :: Science/Research
20
+ Classifier: Intended Audience :: Telecommunications Industry
21
+ Classifier: Programming Language :: Python
10
22
  Classifier: Programming Language :: Python :: 3
23
+ Classifier: Programming Language :: Python :: 3.10
24
+ Classifier: Programming Language :: Python :: 3.11
25
+ Classifier: Programming Language :: Python :: 3.12
26
+ Classifier: Topic :: Scientific/Engineering :: GIS
11
27
  Classifier: License :: OSI Approved :: GNU Affero General Public License v3
12
28
  Classifier: Operating System :: OS Independent
13
29
  Requires-Python: >=3.10
@@ -24,8 +40,9 @@ Requires-Dist: pycountry==24.6.1
24
40
  Requires-Dist: pydantic>=2.10.6
25
41
  Requires-Dist: rasterio==1.3.10
26
42
  Requires-Dist: Requests==2.32.3
27
- Requires-Dist: scipy==1.15.1
43
+ Requires-Dist: scipy>=1.15.1
28
44
  Requires-Dist: Shapely>=2.0.7
45
+ Requires-Dist: networkx>=3.2.1
29
46
  Requires-Dist: tqdm==4.65.0
30
47
  Requires-Dist: OWSLib==0.32.1
31
48
  Requires-Dist: pydantic-settings>=2.7.1
@@ -33,15 +50,16 @@ Requires-Dist: hdx-python-api>=6.3.8
33
50
  Requires-Dist: bs4==0.0.2
34
51
  Requires-Dist: sqlalchemy-trino==0.5.0
35
52
  Requires-Dist: dask>=2024.12.1
53
+ Requires-Dist: h3>=4.2.0
36
54
  Dynamic: author
37
55
  Dynamic: author-email
38
56
  Dynamic: classifier
39
57
  Dynamic: description
40
58
  Dynamic: description-content-type
41
- Dynamic: home-page
42
59
  Dynamic: keywords
43
60
  Dynamic: license
44
61
  Dynamic: license-file
62
+ Dynamic: project-url
45
63
  Dynamic: requires-dist
46
64
  Dynamic: requires-python
47
65
  Dynamic: summary
@@ -53,6 +71,14 @@ Dynamic: summary
53
71
 
54
72
  # GigaSpatial
55
73
 
74
+ [![License: AGPL v3](https://img.shields.io/badge/License-AGPL%20v3-brightgreen.svg)](https://opensource.org/license/agpl-v3)
75
+ [![PyPI version](https://badge.fury.io/py/giga-spatial.svg)](https://badge.fury.io/py/giga-spatial)
76
+ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/giga-spatial.svg?color=dark-green)](https://pypi.org/project/giga-spatial/)
77
+ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
78
+ [![PyPI Downloads](https://static.pepy.tech/badge/giga-spatial)](https://pepy.tech/projects/giga-spatial)
79
+ [![GitHub commit activity](https://img.shields.io/github/commit-activity/y/unicef/giga-spatial.svg?color=dark-green)](https://github.com/unicef/giga-spatial/graphs/contributors)
80
+
81
+
56
82
  ## About Giga
57
83
 
58
84
  [Giga](https://giga.global/) is a UNICEF-ITU initiative to connect every school to the Internet and every young person to information, opportunity and choice.
@@ -61,6 +61,7 @@ gigaspatial/generators/zonal/base.py
61
61
  gigaspatial/generators/zonal/geometry.py
62
62
  gigaspatial/generators/zonal/mercator.py
63
63
  gigaspatial/grid/__init__.py
64
+ gigaspatial/grid/h3.py
64
65
  gigaspatial/grid/mercator_tiles.py
65
66
  gigaspatial/handlers/__init__.py
66
67
  gigaspatial/handlers/base.py
@@ -9,12 +9,14 @@ pycountry==24.6.1
9
9
  pydantic>=2.10.6
10
10
  rasterio==1.3.10
11
11
  Requests==2.32.3
12
- scipy==1.15.1
12
+ scipy>=1.15.1
13
13
  Shapely>=2.0.7
14
+ networkx>=3.2.1
14
15
  tqdm==4.65.0
15
16
  OWSLib==0.32.1
16
17
  pydantic-settings>=2.7.1
17
18
  hdx-python-api>=6.3.8
18
19
  bs4==0.0.2
19
20
  sqlalchemy-trino==0.5.0
20
- dask>=2024.12.1
21
+ dask>=2024.12.1
22
+ h3>=4.2.0
@@ -0,0 +1 @@
1
+ __version__ = "0.7.0"
@@ -1,4 +1,5 @@
1
1
  from azure.storage.blob import BlobServiceClient
2
+ import time
2
3
  import io
3
4
  import contextlib
4
5
  import logging
@@ -151,20 +152,45 @@ class ADLSDataStore(DataStore):
151
152
  "\\", "/"
152
153
  )
153
154
 
154
- # Create a source blob client
155
- source_blob_client = self.container_client.get_blob_client(blob.name)
155
+ # Use copy_file method to copy each file
156
+ self.copy_file(blob.name, new_blob_path, overwrite=True)
156
157
 
157
- # Create a destination blob client
158
- destination_blob_client = self.container_client.get_blob_client(
159
- new_blob_path
158
+ print(f"Copied directory from {source_dir} to {destination_dir}")
159
+ except Exception as e:
160
+ print(f"Failed to copy directory {source_dir}: {e}")
161
+
162
+ def copy_file(
163
+ self, source_path: str, destination_path: str, overwrite: bool = False
164
+ ):
165
+ """
166
+ Copies a single file from source to destination within the same container.
167
+
168
+ :param source_path: The source file path in the blob storage
169
+ :param destination_path: The destination file path in the blob storage
170
+ :param overwrite: If True, overwrite the destination file if it already exists
171
+ """
172
+ try:
173
+ if not self.file_exists(source_path):
174
+ raise FileNotFoundError(f"Source file not found: {source_path}")
175
+
176
+ if self.file_exists(destination_path) and not overwrite:
177
+ raise FileExistsError(
178
+ f"Destination file already exists and overwrite is False: {destination_path}"
160
179
  )
161
180
 
162
- # Start the copy operation
163
- destination_blob_client.start_copy_from_url(source_blob_client.url)
181
+ # Create source and destination blob clients
182
+ source_blob_client = self.container_client.get_blob_client(source_path)
183
+ destination_blob_client = self.container_client.get_blob_client(
184
+ destination_path
185
+ )
164
186
 
165
- print(f"Copied directory from {source_dir} to {destination_dir}")
187
+ # Start the server-side copy operation
188
+ destination_blob_client.start_copy_from_url(source_blob_client.url)
189
+
190
+ print(f"Copied file from {source_path} to {destination_path}")
166
191
  except Exception as e:
167
- print(f"Failed to copy directory {source_dir}: {e}")
192
+ print(f"Failed to copy file {source_path}: {e}")
193
+ raise
168
194
 
169
195
  def exists(self, path: str) -> bool:
170
196
  blob_client = self.blob_service_client.get_blob_client(
@@ -285,8 +311,20 @@ class ADLSDataStore(DataStore):
285
311
  return False
286
312
 
287
313
  def rmdir(self, dir: str) -> None:
288
- blobs = self.list_files(dir)
289
- self.container_client.delete_blobs(*blobs)
314
+ # Normalize directory path to ensure it targets all children
315
+ dir_path = dir.rstrip("/") + "/"
316
+
317
+ # Azure Blob batch delete has a hard limit on number of sub-requests
318
+ # per batch (currently 256). Delete in chunks to avoid
319
+ # ExceedsMaxBatchRequestCount errors.
320
+ blobs = list(self.list_files(dir_path))
321
+ if not blobs:
322
+ return
323
+
324
+ BATCH_LIMIT = 256
325
+ for start_idx in range(0, len(blobs), BATCH_LIMIT):
326
+ batch = blobs[start_idx : start_idx + BATCH_LIMIT]
327
+ self.container_client.delete_blobs(*batch)
290
328
 
291
329
  def mkdir(self, path: str, exist_ok: bool = False) -> None:
292
330
  """
@@ -323,3 +361,58 @@ class ADLSDataStore(DataStore):
323
361
  )
324
362
  if blob_client.exists():
325
363
  blob_client.delete_blob()
364
+
365
+ def rename(
366
+ self,
367
+ source_path: str,
368
+ destination_path: str,
369
+ overwrite: bool = False,
370
+ delete_source: bool = True,
371
+ wait: bool = True,
372
+ timeout_seconds: int = 300,
373
+ poll_interval_seconds: int = 1,
374
+ ) -> None:
375
+ """
376
+ Rename (move) a single file by copying to the new path and deleting the source.
377
+
378
+ :param source_path: Existing blob path
379
+ :param destination_path: Target blob path
380
+ :param overwrite: Overwrite destination if it already exists
381
+ :param delete_source: Delete original after successful copy
382
+ :param wait: Wait for the copy operation to complete
383
+ :param timeout_seconds: Max time to wait for copy to succeed
384
+ :param poll_interval_seconds: Polling interval while waiting
385
+ """
386
+
387
+ if not self.file_exists(source_path):
388
+ raise FileNotFoundError(f"Source file not found: {source_path}")
389
+
390
+ if self.file_exists(destination_path) and not overwrite:
391
+ raise FileExistsError(
392
+ f"Destination already exists and overwrite is False: {destination_path}"
393
+ )
394
+
395
+ # Use copy_file method to copy the file
396
+ self.copy_file(source_path, destination_path, overwrite=overwrite)
397
+
398
+ if wait:
399
+ # Wait for copy to complete if requested
400
+ dest_client = self.container_client.get_blob_client(destination_path)
401
+ deadline = time.time() + timeout_seconds
402
+ while True:
403
+ props = dest_client.get_blob_properties()
404
+ status = getattr(props.copy, "status", None)
405
+ if status == "success":
406
+ break
407
+ if status in {"aborted", "failed"}:
408
+ raise IOError(
409
+ f"Copy failed with status {status} from {source_path} to {destination_path}"
410
+ )
411
+ if time.time() > deadline:
412
+ raise TimeoutError(
413
+ f"Timed out waiting for copy to complete for {destination_path}"
414
+ )
415
+ time.sleep(poll_interval_seconds)
416
+
417
+ if delete_source:
418
+ self.remove(source_path)
@@ -1,5 +1,6 @@
1
1
  from pathlib import Path
2
2
  import os
3
+ import shutil
3
4
  from typing import Any, List, Generator, Tuple, Union, IO
4
5
 
5
6
  from .data_store import DataStore
@@ -79,6 +80,13 @@ class LocalDataStore(DataStore):
79
80
  if full_path.is_file():
80
81
  os.remove(full_path)
81
82
 
83
+ def copy_file(self, src: str, dst: str) -> None:
84
+ """Copy a file from src to dst."""
85
+ src_path = self._resolve_path(src)
86
+ dst_path = self._resolve_path(dst)
87
+ self.mkdir(str(dst_path.parent), exist_ok=True)
88
+ shutil.copy2(src_path, dst_path)
89
+
82
90
  def rmdir(self, directory: str) -> None:
83
91
  full_path = self._resolve_path(directory)
84
92
  if full_path.is_dir():
@@ -145,11 +145,18 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
145
145
  gpd.GeoDataFrame: A GeoDataFrame with 'zone_id' and 'geometry' columns.
146
146
  The zone_id column is renamed from the original zone_id_column if different.
147
147
  """
148
- # If we already have a GeoDataFrame, just rename the ID column if needed
149
- result = self._zone_gdf.copy()
150
- if self.zone_id_column != "zone_id":
151
- result = result.rename(columns={self.zone_id_column: "zone_id"})
152
- return result
148
+ # Since _zone_gdf is already created with 'zone_id' column in the constructor,
149
+ # we just need to return a copy of it
150
+ return self._zone_gdf.copy()
151
+
152
+ @property
153
+ def zone_gdf(self) -> gpd.GeoDataFrame:
154
+ """Override the base class zone_gdf property to ensure correct column names.
155
+
156
+ Returns:
157
+ gpd.GeoDataFrame: A GeoDataFrame with 'zone_id' and 'geometry' columns.
158
+ """
159
+ return self._zone_gdf.copy()
153
160
 
154
161
  def map_built_s(
155
162
  self,