earthforge-raster 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,31 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ *.egg
6
+ dist/
7
+ build/
8
+ *.whl
9
+
10
+ # Virtual environments
11
+ .venv/
12
+ venv/
13
+
14
+ # IDE
15
+ .vscode/
16
+ .idea/
17
+ *.swp
18
+ *.swo
19
+
20
+ # Testing
21
+ .pytest_cache/
22
+ .coverage
23
+ htmlcov/
24
+ .mypy_cache/
25
+
26
+ # OS
27
+ .DS_Store
28
+ Thumbs.db
29
+
30
+ # Claude
31
+ .claude/
@@ -0,0 +1,16 @@
1
+ Metadata-Version: 2.4
2
+ Name: earthforge-raster
3
+ Version: 0.1.0
4
+ Summary: EarthForge raster operations: COG info, validation, conversion, preview
5
+ License-Expression: GPL-3.0-or-later
6
+ Requires-Python: >=3.11
7
+ Requires-Dist: earthforge-core>=0.1.0
8
+ Requires-Dist: numpy>=1.24
9
+ Requires-Dist: rasterio>=1.3
10
+ Description-Content-Type: text/markdown
11
+
12
+ # earthforge-raster
13
+
14
+ COG and GeoTIFF operations for EarthForge: info, validation, conversion, preview.
15
+
16
+ See the [main README](../../README.md) for project documentation.
@@ -0,0 +1,5 @@
1
+ # earthforge-raster
2
+
3
+ COG and GeoTIFF operations for EarthForge: info, validation, conversion, preview.
4
+
5
+ See the [main README](../../README.md) for project documentation.
@@ -0,0 +1,19 @@
1
+ [build-system]
2
+ requires = ["hatchling>=1.18"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "earthforge-raster"
7
+ version = "0.1.0"
8
+ description = "EarthForge raster operations: COG info, validation, conversion, preview"
9
+ readme = "README.md"
10
+ license = "GPL-3.0-or-later"
11
+ requires-python = ">=3.11"
12
+ dependencies = [
13
+ "earthforge-core>=0.1.0",
14
+ "rasterio>=1.3",
15
+ "numpy>=1.24",
16
+ ]
17
+
18
+ [tool.hatch.build.targets.wheel]
19
+ packages = ["src/earthforge"]
@@ -0,0 +1,7 @@
1
+ """EarthForge raster operations — COG info, validation, conversion, and preview.
2
+
3
+ This package provides async-first APIs for working with Cloud Optimized GeoTIFFs
4
+ and other raster formats. All I/O goes through ``earthforge.core`` wrappers.
5
+ """
6
+
7
+ __version__ = "0.1.0"
@@ -0,0 +1,261 @@
1
+ """GeoTIFF to Cloud-Optimized GeoTIFF (COG) conversion.
2
+
3
+ Converts plain GeoTIFF files into COG format by applying tiling, compression,
4
+ and overview generation. Uses GDAL's COG driver (via rasterio) for spec-
5
+ compliant output with proper IFD ordering.
6
+
7
+ Usage::
8
+
9
+ from earthforge.raster.convert import convert_to_cog
10
+
11
+ result = await convert_to_cog("input.tif", output="output.tif")
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import asyncio
17
+ from functools import partial
18
+ from pathlib import Path
19
+
20
+ from pydantic import BaseModel, Field
21
+
22
+ from earthforge.raster.errors import RasterError
23
+
24
+
25
+ class CogConvertResult(BaseModel):
26
+ """Structured result from a COG conversion.
27
+
28
+ Attributes:
29
+ source: Input file path.
30
+ output: Output COG file path.
31
+ width: Raster width in pixels.
32
+ height: Raster height in pixels.
33
+ band_count: Number of bands.
34
+ dtype: Data type of the output.
35
+ crs: CRS identifier string.
36
+ compression: Compression codec used.
37
+ blocksize: Tile size used.
38
+ overview_levels: Overview decimation levels generated.
39
+ overview_resampling: Resampling method used for overviews.
40
+ file_size_bytes: Output file size in bytes.
41
+ """
42
+
43
+ source: str = Field(title="Source")
44
+ output: str = Field(title="Output")
45
+ width: int = Field(title="Width")
46
+ height: int = Field(title="Height")
47
+ band_count: int = Field(title="Bands")
48
+ dtype: str = Field(title="Data Type")
49
+ crs: str | None = Field(default=None, title="CRS")
50
+ compression: str = Field(title="Compression")
51
+ blocksize: int = Field(title="Block Size")
52
+ overview_levels: list[int] = Field(title="Overview Levels")
53
+ overview_resampling: str = Field(title="Overview Resampling")
54
+ file_size_bytes: int | None = Field(default=None, title="Size (bytes)")
55
+
56
+
57
+ def _compute_overview_levels(width: int, height: int, blocksize: int = 512) -> list[int]:
58
+ """Compute appropriate overview levels for a raster.
59
+
60
+ Generates powers-of-2 overview levels until the smallest overview
61
+ dimension is roughly one tile or smaller.
62
+
63
+ Parameters:
64
+ width: Raster width in pixels.
65
+ height: Raster height in pixels.
66
+ blocksize: Tile size (default: 512).
67
+
68
+ Returns:
69
+ List of overview decimation factors (e.g. ``[2, 4, 8, 16]``).
70
+ """
71
+ levels: list[int] = []
72
+ factor = 2
73
+ min_dim = min(width, height)
74
+ while min_dim // factor >= blocksize // 2:
75
+ levels.append(factor)
76
+ factor *= 2
77
+ # Always include at least one level if the raster is large enough
78
+ if not levels and min_dim > blocksize:
79
+ levels.append(2)
80
+ return levels
81
+
82
+
83
+ def _convert_to_cog_sync(
84
+ source: str,
85
+ *,
86
+ output: str | None = None,
87
+ compression: str = "deflate",
88
+ blocksize: int = 512,
89
+ resampling: str = "average",
90
+ overview_levels: list[int] | None = None,
91
+ ) -> CogConvertResult:
92
+ """Convert a GeoTIFF to COG synchronously.
93
+
94
+ Parameters:
95
+ source: Path to the input GeoTIFF.
96
+ output: Output COG path. If ``None``, appends ``_cog`` to the stem.
97
+ compression: Compression codec (``"deflate"``, ``"lzw"``, ``"zstd"``).
98
+ blocksize: Tile size in pixels (default: 512).
99
+ resampling: Resampling method for overviews (default: ``"average"``).
100
+ overview_levels: Explicit overview levels. If ``None``, auto-computed.
101
+
102
+ Returns:
103
+ Structured conversion result.
104
+
105
+ Raises:
106
+ RasterError: If the conversion fails.
107
+ """
108
+ try:
109
+ import rasterio
110
+ except ImportError as exc:
111
+ raise RasterError(
112
+ "rasterio is required for COG conversion: pip install earthforge[raster]"
113
+ ) from exc
114
+
115
+ # Read source metadata
116
+ try:
117
+ src_ds = rasterio.open(source)
118
+ except Exception as exc:
119
+ raise RasterError(f"Failed to open '{source}': {exc}") from exc
120
+
121
+ with src_ds:
122
+ width = src_ds.width
123
+ height = src_ds.height
124
+ band_count = src_ds.count
125
+ dtype = str(src_ds.dtypes[0])
126
+ crs_str = str(src_ds.crs) if src_ds.crs else None
127
+
128
+ # Compute overview levels
129
+ if overview_levels is None:
130
+ overview_levels = _compute_overview_levels(width, height, blocksize)
131
+
132
+ # Determine output path
133
+ if output is None:
134
+ src_path = Path(source)
135
+ output = str(src_path.with_stem(f"{src_path.stem}_cog"))
136
+
137
+ # Use GDAL's COG driver for spec-compliant output (handles tiling, IFD
138
+ # ordering, and overview generation in a single pass)
139
+ try:
140
+ from osgeo import gdal
141
+
142
+ gdal.UseExceptions()
143
+
144
+ compression_map = {
145
+ "deflate": "DEFLATE",
146
+ "lzw": "LZW",
147
+ "zstd": "ZSTD",
148
+ "lzma": "LZMA",
149
+ "none": "NONE",
150
+ }
151
+ gdal_compress = compression_map.get(compression.lower(), compression.upper())
152
+
153
+ resampling_map = {
154
+ "nearest": "NEAREST",
155
+ "bilinear": "BILINEAR",
156
+ "cubic": "CUBIC",
157
+ "average": "AVERAGE",
158
+ "lanczos": "LANCZOS",
159
+ }
160
+ gdal_resamp = resampling_map.get(resampling.lower(), "NEAREST")
161
+
162
+ # PREDICTOR=2 (horizontal differencing) improves DEFLATE/LZW compression
163
+ # by 30-40% for integer/byte imagery — standard COG best practice.
164
+ # Only applies to lossless codecs; skipped for NONE/ZSTD to avoid
165
+ # GDAL warnings on unsupported codec+predictor combinations.
166
+ predictor_codecs = {"DEFLATE", "LZW", "LZMA"}
167
+ creation_options = [
168
+ f"COMPRESS={gdal_compress}",
169
+ f"BLOCKSIZE={blocksize}",
170
+ f"OVERVIEW_RESAMPLING={gdal_resamp}",
171
+ ]
172
+ if gdal_compress in predictor_codecs:
173
+ creation_options.append("PREDICTOR=2")
174
+
175
+ translate_options = gdal.TranslateOptions(
176
+ format="COG",
177
+ creationOptions=creation_options,
178
+ )
179
+
180
+ result_ds = gdal.Translate(output, source, options=translate_options)
181
+ if result_ds is None:
182
+ raise RasterError("GDAL Translate returned None")
183
+ result_ds = None # Close / flush
184
+
185
+ except ImportError as exc:
186
+ raise RasterError(
187
+ "GDAL is required for COG conversion: install GDAL Python bindings"
188
+ ) from exc
189
+ except RasterError:
190
+ raise
191
+ except Exception as exc:
192
+ raise RasterError(f"COG conversion failed: {exc}") from exc
193
+
194
+ # Read actual overview levels from the output
195
+ with rasterio.open(output) as out_ds:
196
+ overview_levels = out_ds.overviews(1) if out_ds.count > 0 else []
197
+
198
+ # Get output file size
199
+ file_size: int | None = None
200
+ try:
201
+ file_size = Path(output).stat().st_size
202
+ except OSError:
203
+ pass
204
+
205
+ return CogConvertResult(
206
+ source=source,
207
+ output=output,
208
+ width=width,
209
+ height=height,
210
+ band_count=band_count,
211
+ dtype=dtype,
212
+ crs=crs_str,
213
+ compression=compression,
214
+ blocksize=blocksize,
215
+ overview_levels=overview_levels,
216
+ overview_resampling=resampling,
217
+ file_size_bytes=file_size,
218
+ )
219
+
220
+
221
+ async def convert_to_cog(
222
+ source: str,
223
+ *,
224
+ output: str | None = None,
225
+ compression: str = "deflate",
226
+ blocksize: int = 512,
227
+ resampling: str = "average",
228
+ overview_levels: list[int] | None = None,
229
+ ) -> CogConvertResult:
230
+ """Convert a GeoTIFF to Cloud-Optimized GeoTIFF (COG).
231
+
232
+ Applies tiling, compression, and overview generation. The output follows
233
+ the COG specification with proper IFD ordering (overviews after main image).
234
+
235
+ Parameters:
236
+ source: Path to the input GeoTIFF.
237
+ output: Output COG path. If ``None``, appends ``_cog`` to the stem.
238
+ compression: Compression codec (default: ``"deflate"``).
239
+ blocksize: Tile size in pixels (default: 512).
240
+ resampling: Resampling for overviews (default: ``"nearest"``).
241
+ overview_levels: Explicit overview levels. ``None`` auto-computes.
242
+
243
+ Returns:
244
+ Structured conversion result.
245
+
246
+ Raises:
247
+ RasterError: If the conversion fails.
248
+ """
249
+ loop = asyncio.get_running_loop()
250
+ return await loop.run_in_executor(
251
+ None,
252
+ partial(
253
+ _convert_to_cog_sync,
254
+ source,
255
+ output=output,
256
+ compression=compression,
257
+ blocksize=blocksize,
258
+ resampling=resampling,
259
+ overview_levels=overview_levels,
260
+ ),
261
+ )
@@ -0,0 +1,29 @@
1
+ """Raster-specific error types."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from earthforge.core.errors import EarthForgeError
6
+
7
+
8
+ class RasterError(EarthForgeError):
9
+ """Base exception for raster operations.
10
+
11
+ Parameters:
12
+ message: Human-readable description.
13
+ exit_code: CLI exit code (defaults to ``10``).
14
+ """
15
+
16
+ def __init__(self, message: str, *, exit_code: int = 10) -> None:
17
+ super().__init__(message, exit_code=exit_code)
18
+
19
+
20
+ class CogValidationError(RasterError):
21
+ """Raised when a file fails COG compliance checks.
22
+
23
+ Parameters:
24
+ message: Human-readable description of the validation failure.
25
+ exit_code: CLI exit code (defaults to ``11``).
26
+ """
27
+
28
+ def __init__(self, message: str, *, exit_code: int = 11) -> None:
29
+ super().__init__(message, exit_code=exit_code)
@@ -0,0 +1,214 @@
1
+ """Raster file inspection — COG and GeoTIFF metadata extraction.
2
+
3
+ Reads raster metadata (dimensions, CRS, bands, data types, tiling, overviews)
4
+ without loading pixel data. For remote files, rasterio uses GDAL's virtual
5
+ filesystem (vsicurl) which issues HTTP range requests automatically.
6
+
7
+ Usage::
8
+
9
+ from earthforge.raster.info import inspect_raster, inspect_raster_sync
10
+
11
+ info = await inspect_raster("/path/to/file.tif")
12
+ print(info.width, info.height, info.crs)
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import asyncio
18
+ import logging
19
+ from functools import partial
20
+ from typing import Any
21
+
22
+ from pydantic import BaseModel, Field
23
+
24
+ from earthforge.raster.errors import RasterError
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ class BandInfo(BaseModel):
30
+ """Metadata for a single raster band.
31
+
32
+ Attributes:
33
+ index: 1-based band index.
34
+ dtype: Data type (e.g. ``"uint8"``, ``"float32"``).
35
+ nodata: NoData value, or ``None`` if not set.
36
+ description: Band description, or empty string.
37
+ """
38
+
39
+ index: int = Field(title="Band")
40
+ dtype: str = Field(title="Data Type")
41
+ nodata: float | int | None = Field(default=None, title="NoData")
42
+ description: str = Field(default="", title="Description")
43
+
44
+
45
+ class RasterInfo(BaseModel):
46
+ """Structured metadata for a raster file.
47
+
48
+ Attributes:
49
+ source: The file path or URL that was inspected.
50
+ driver: GDAL driver name (e.g. ``"GTiff"``).
51
+ width: Raster width in pixels.
52
+ height: Raster height in pixels.
53
+ crs: Coordinate reference system as a string (e.g. ``"EPSG:4326"``).
54
+ bounds: Bounding box as ``[west, south, east, north]``.
55
+ transform: Affine transform as a 6-element list.
56
+ band_count: Number of bands.
57
+ bands: Per-band metadata.
58
+ tile_width: Tile width in pixels, or ``None`` if untiled (strip layout).
59
+ tile_height: Tile height in pixels, or ``None`` if untiled.
60
+ is_tiled: Whether the raster uses tiled layout.
61
+ overview_count: Number of overview levels.
62
+ overview_levels: List of overview decimation factors.
63
+ compression: Compression method (e.g. ``"deflate"``, ``"lzw"``), or ``None``.
64
+ interleave: Pixel interleaving (``"band"``, ``"pixel"``), or ``None``.
65
+ """
66
+
67
+ source: str = Field(title="Source")
68
+ driver: str = Field(title="Driver")
69
+ width: int = Field(title="Width (px)")
70
+ height: int = Field(title="Height (px)")
71
+ crs: str | None = Field(default=None, title="CRS")
72
+ bounds: list[float] = Field(default_factory=list, title="Bounds")
73
+ transform: list[float] = Field(default_factory=list, title="Transform")
74
+ band_count: int = Field(title="Bands")
75
+ bands: list[BandInfo] = Field(default_factory=list, title="Band Info")
76
+ tile_width: int | None = Field(default=None, title="Tile Width")
77
+ tile_height: int | None = Field(default=None, title="Tile Height")
78
+ is_tiled: bool = Field(default=False, title="Tiled")
79
+ overview_count: int = Field(default=0, title="Overviews")
80
+ overview_levels: list[int] = Field(default_factory=list, title="Overview Levels")
81
+ compression: str | None = Field(default=None, title="Compression")
82
+ interleave: str | None = Field(default=None, title="Interleave")
83
+
84
+
85
+ def _read_raster_info(source: str) -> RasterInfo:
86
+ """Synchronous implementation that reads raster metadata via rasterio.
87
+
88
+ Parameters:
89
+ source: Local file path or URL (rasterio handles vsicurl for URLs).
90
+
91
+ Returns:
92
+ Structured raster metadata.
93
+
94
+ Raises:
95
+ RasterError: If the file cannot be opened or read.
96
+ """
97
+ try:
98
+ import rasterio
99
+ except ImportError as exc:
100
+ raise RasterError(
101
+ "rasterio is required for raster operations. "
102
+ "Install it with: pip install earthforge[raster]"
103
+ ) from exc
104
+
105
+ try:
106
+ with rasterio.open(source) as ds:
107
+ # Basic dimensions
108
+ width = ds.width
109
+ height = ds.height
110
+ driver = ds.driver
111
+ band_count = ds.count
112
+
113
+ # CRS
114
+ crs_str: str | None = None
115
+ if ds.crs is not None:
116
+ crs_str = str(ds.crs)
117
+
118
+ # Bounds as [west, south, east, north]
119
+ b = ds.bounds
120
+ bounds = [b.left, b.bottom, b.right, b.top]
121
+
122
+ # Affine transform
123
+ t = ds.transform
124
+ transform = [t.a, t.b, t.c, t.d, t.e, t.f]
125
+
126
+ # Per-band info
127
+ bands: list[BandInfo] = []
128
+ for i in range(1, band_count + 1):
129
+ nodata_val = ds.nodata
130
+ desc = ds.descriptions[i - 1] or ""
131
+ bands.append(
132
+ BandInfo(
133
+ index=i,
134
+ dtype=str(ds.dtypes[i - 1]),
135
+ nodata=nodata_val,
136
+ description=desc,
137
+ )
138
+ )
139
+
140
+ # Tiling info from the first IFD
141
+ profile: dict[str, Any] = ds.profile
142
+ blockxsize = profile.get("blockxsize", 0)
143
+ blockysize = profile.get("blockysize", 0)
144
+ tiled = profile.get("tiled", False)
145
+ tile_w: int | None = blockxsize if tiled else None
146
+ tile_h: int | None = blockysize if tiled else None
147
+
148
+ # Overviews (from band 1)
149
+ overviews = ds.overviews(1) if band_count > 0 else []
150
+
151
+ # Compression and interleave
152
+ compression = profile.get("compress")
153
+ if compression:
154
+ compression = str(compression).lower()
155
+ interleave = profile.get("interleave")
156
+ if interleave:
157
+ interleave = str(interleave).lower()
158
+
159
+ except RasterError:
160
+ raise
161
+ except Exception as exc:
162
+ raise RasterError(f"Failed to read raster metadata from {source!r}: {exc}") from exc
163
+
164
+ return RasterInfo(
165
+ source=source,
166
+ driver=driver,
167
+ width=width,
168
+ height=height,
169
+ crs=crs_str,
170
+ bounds=bounds,
171
+ transform=transform,
172
+ band_count=band_count,
173
+ bands=bands,
174
+ tile_width=tile_w,
175
+ tile_height=tile_h,
176
+ is_tiled=tiled,
177
+ overview_count=len(overviews),
178
+ overview_levels=overviews,
179
+ compression=compression,
180
+ interleave=interleave,
181
+ )
182
+
183
+
184
+ async def inspect_raster(source: str) -> RasterInfo:
185
+ """Inspect a raster file and return structured metadata.
186
+
187
+ Runs rasterio in a thread executor since GDAL I/O is blocking.
188
+
189
+ Parameters:
190
+ source: Local file path or URL.
191
+
192
+ Returns:
193
+ Structured raster metadata.
194
+
195
+ Raises:
196
+ RasterError: If the file cannot be opened or read.
197
+ """
198
+ loop = asyncio.get_running_loop()
199
+ return await loop.run_in_executor(None, partial(_read_raster_info, source))
200
+
201
+
202
+ def inspect_raster_sync(source: str) -> RasterInfo:
203
+ """Synchronous convenience wrapper for :func:`inspect_raster`.
204
+
205
+ Parameters:
206
+ source: Local file path or URL.
207
+
208
+ Returns:
209
+ Structured raster metadata.
210
+
211
+ Raises:
212
+ RasterError: If the file cannot be opened or read.
213
+ """
214
+ return _read_raster_info(source)