PyPI - nc2cog - Versions diffs - 0.1.3__py3-none-any.whl - Mend

nc2cog 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

nc2cog/__init__.py +3 -0
nc2cog/__version__.py +8 -0
nc2cog/analyzer.py +185 -0
nc2cog/cli.py +266 -0
nc2cog/config.py +158 -0
nc2cog/discovery.py +109 -0
nc2cog/errors.py +25 -0
nc2cog/logger.py +48 -0
nc2cog/metadata.py +254 -0
nc2cog/processor.py +534 -0
nc2cog-0.1.3.dist-info/METADATA +360 -0
nc2cog-0.1.3.dist-info/RECORD +16 -0
nc2cog-0.1.3.dist-info/WHEEL +5 -0
nc2cog-0.1.3.dist-info/entry_points.txt +2 -0
nc2cog-0.1.3.dist-info/licenses/LICENSE +21 -0
nc2cog-0.1.3.dist-info/top_level.txt +1 -0

nc2cog/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""netCDF to COG TIFF Converter Package."""
+from .__version__ import __version__

nc2cog/__version__.py ADDED Viewed

@@ -0,0 +1,8 @@
+"""Version information for netCDF to COG TIFF converter."""
+from importlib.metadata import version, PackageNotFoundError
+try:
+    __version__ = version("nc2cog")
+except PackageNotFoundError:
+    __version__ = "unknown"

nc2cog/analyzer.py ADDED Viewed

@@ -0,0 +1,185 @@
+"""Analyzer for netCDF file structure and subdatasets."""
+from pathlib import Path
+from typing import List, Dict, Optional
+# Coordinate variables to exclude from data variables
+_COORD_NAMES = frozenset({
+    'lat', 'lon', 'latitude', 'longitude', 'time', 'crs',
+    'x', 'y', 'spatial_ref', 'nav_lat', 'nav_lon',
+})
+# Try to import netCDF4
+try:
+    import netCDF4
+    NETCDF4_AVAILABLE = True
+except ImportError:
+    NETCDF4_AVAILABLE = False
+class NCAnalyzer:
+    """Analyzes netCDF files for variable and dimension structure."""
+    def __init__(self, input_file: Path):
+        """
+        Initialize analyzer.
+        Args:
+            input_file: Path to netCDF file
+        """
+        self.input_file = Path(input_file)
+    def get_subdatasets(self) -> List[str]:
+        """
+        Get GDAL subdataset paths for this netCDF file.
+        Returns:
+            List of subdataset paths. Empty list means the file is
+            directly readable as a 2D raster (no subdatasets).
+        """
+        from osgeo import gdal
+        ds = gdal.Open(str(self.input_file))
+        if ds is None:
+            return []
+        subdatasets = ds.GetMetadata('SUBDATASETS')
+        result = []
+        for key, value in subdatasets.items():
+            if key.endswith('_NAME'):
+                result.append(value)
+        return result
+    def get_data_variables(self) -> List[str]:
+        """
+        Get list of data variable names, excluding coordinate variables.
+        Returns:
+            List of data variable names
+        """
+        if not NETCDF4_AVAILABLE:
+            raise ImportError(
+                "netCDF4 is required for multi-dimensional NC files. "
+                "Install it with: pip install netCDF4"
+            )
+        nc = netCDF4.Dataset(str(self.input_file), 'r')
+        try:
+            # Get all variable names that have >1 dimension (data variables)
+            # and exclude known coordinate variables
+            data_vars = []
+            for name in nc.variables:
+                if name.lower() in _COORD_NAMES:
+                    continue
+                var = nc.variables[name]
+                # Must be at least 1D and not a scalar metadata variable
+                if var.ndim >= 1 and len(var.dimensions) >= 1:
+                    data_vars.append(name)
+            return sorted(data_vars)
+        finally:
+            nc.close()
+    def analyze_subdataset(self, subdataset_path: str) -> Dict:
+        """
+        Analyze a GDAL subdataset to extract dimension info.
+        Args:
+            subdataset_path: GDAL subdataset path
+        Returns:
+            Dict with keys: name, dims, shape, dtype, time_count, time_units
+        """
+        from osgeo import gdal
+        ds = gdal.Open(subdataset_path)
+        if ds is None:
+            raise ValueError(f"Cannot open subdataset: {subdataset_path}")
+        # Extract variable name from subdataset path
+        # Format: NETCDF:"file.nc":VARNAME
+        name = subdataset_path.rsplit(':', 1)[-1]
+        info = {
+            'name': name,
+            'width': ds.RasterXSize,
+            'height': ds.RasterYSize,
+            'bands': ds.RasterCount,
+            'dtype': gdal.GetDataTypeName(ds.GetRasterBand(1).DataType),
+            'gdal_dtype': ds.GetRasterBand(1).DataType,
+        }
+        # Try to get time info from netCDF4
+        if NETCDF4_AVAILABLE:
+            try:
+                nc = netCDF4.Dataset(str(self.input_file), 'r')
+                if name in nc.variables:
+                    var = nc.variables[name]
+                    info['dims'] = list(var.dimensions)
+                    info['shape'] = list(var.shape)
+                    # Find time dimension
+                    for dim_name in var.dimensions:
+                        if dim_name.lower() in ('time', 't'):
+                            if dim_name in nc.variables:
+                                time_var = nc.variables[dim_name]
+                                info['time_count'] = len(time_var)
+                                info['time_units'] = getattr(time_var, 'units', None)
+                                if hasattr(time_var, '__getitem__'):
+                                    info['time_values'] = time_var[:]
+                nc.close()
+            except Exception:
+                pass
+        return info
+    def get_time_descriptions(self, variable_name: str) -> List[str]:
+        """
+        Get human-readable time descriptions for a variable.
+        Args:
+            variable_name: Name of the variable
+        Returns:
+            List of time step descriptions
+        """
+        if not NETCDF4_AVAILABLE:
+            return []
+        try:
+            from datetime import datetime, timedelta
+            nc = netCDF4.Dataset(str(self.input_file), 'r')
+            try:
+                if variable_name not in nc.variables:
+                    return []
+                var = nc.variables[variable_name]
+                time_dim_name = None
+                for dim_name in var.dimensions:
+                    if dim_name.lower() in ('time', 't'):
+                        time_dim_name = dim_name
+                        break
+                if time_dim_name is None or time_dim_name not in nc.variables:
+                    return [f"step_{i}" for i in range(var.shape[0])]
+                time_var = nc.variables[time_dim_name]
+                time_units = getattr(time_var, 'units', None)
+                times = time_var[:]
+                descriptions = []
+                if time_units:
+                    # Parse CF time units like "minutes since 2025-11-13T06:30:00"
+                    try:
+                        base_str = time_units.split(' since ')[1].strip()
+                        for i, t in enumerate(times):
+                            # netCDF4.num2date handles the conversion
+                            dt = netCDF4.num2date(t, time_units)
+                            descriptions.append(f"time={i}, {dt.isoformat()}")
+                    except Exception:
+                        descriptions = [f"time={i}" for i in range(len(times))]
+                else:
+                    descriptions = [f"time={i}" for i in range(len(times))]
+                return descriptions
+            finally:
+                nc.close()
+        except Exception:
+            return []

nc2cog/cli.py ADDED Viewed

@@ -0,0 +1,266 @@
+"""Command-line interface for netCDF to COG TIFF converter."""
+import click
+import sys
+from pathlib import Path
+import time
+from typing import Optional
+from .config import ConfigManager
+from .discovery import FileDiscovery
+from .processor import ProcessingEngine
+from .logger import setup_logger
+from .errors import NC2COGError
+from .analyzer import NCAnalyzer
+from .__version__ import __version__
+@click.command()
+@click.version_option(version=__version__, prog_name='nc2cog')
+@click.option('-V', is_flag=True, callback=lambda ctx, param, value: click.echo(f"nc2cog {__version__}") or ctx.exit(0) if value else None, expose_value=False, is_eager=True, help='Show version and exit')
+@click.argument('input_path', type=click.Path(exists=True, dir_okay=True, file_okay=True))
+@click.argument('output_path', type=click.Path(dir_okay=True, file_okay=True))
+@click.option('--config', '-c', type=click.Path(exists=True), help='Path to configuration file')
+@click.option('--compression', type=click.Choice(['deflate', 'lzw', 'jpeg']), default='deflate', help='Compression type')
+@click.option('--zlevel', type=click.IntRange(1, 9), default=6, help='Compression level for deflate (1-9, default: 6)')
+@click.option('--block-size', type=int, default=256, help='Block size for compression (default: 256)')
+@click.option('--resampling', type=click.Choice(['nearest', 'bilinear', 'cubic', 'average', 'mode', 'gauss', 'rms']), default='nearest', help='Resampling method for overviews (default: nearest)')
+@click.option('--tile-size', type=int, default=512, help='Tile size for COG (default: 512)')
+@click.option('--overview-levels', default='2,4,8,16', help='Overview levels for pyramid structure, comma-separated (default: 2,4,8,16)')
+@click.option('--overwrite', is_flag=True, help='Overwrite existing output files')
+@click.option('--dry-run', is_flag=True, help='Show what would be processed without doing it')
+@click.option('--verbose', '-v', is_flag=True, help='Enable verbose logging')
+@click.option('--resume', is_flag=True, help='Resume from last processed file')
+@click.option('--threads', type=int, default=1, help='Number of parallel processing threads')
+@click.option('--src-proj', type=str, help='Source projection in EPSG format (e.g., EPSG:4326)')
+@click.option('--dst-proj', type=str, help='Target projection in EPSG format (e.g., EPSG:3857)')
+@click.option('--variables', type=str, default=None, help='Variables to convert (comma-separated, e.g., PRE,REF)')
+@click.option('--metadata-source', type=str, default=None,
+              help='Data source description for metadata (e.g., satellite, sensor)')
+def main(
+    input_path: str,
+    output_path: str,
+    config: Optional[str],
+    compression: str,
+    zlevel: int,
+    block_size: int,
+    resampling: str,
+    tile_size: int,
+    overview_levels: str,
+    overwrite: bool,
+    dry_run: bool,
+    verbose: bool,
+    resume: bool,
+    threads: int,
+    src_proj: Optional[str],
+    dst_proj: Optional[str],
+    variables: Optional[str],
+    metadata_source: Optional[str],
+) -> None:
+    """
+    Convert netCDF files to Cloud-Optimized GeoTIFF format.
+    INPUT_PATH: Source directory or file path
+    OUTPUT_PATH: Destination directory
+    """
+    # Setup logging
+    logger = setup_logger(verbose=verbose)
+    # Initialize components
+    try:
+        # Load configuration
+        config_path = Path(config) if config else None
+        config_manager = ConfigManager(config_path)
+        # Override config with CLI options if provided
+        if compression != 'deflate':
+            config_manager.config['compression'] = compression
+        if zlevel != 6:
+            config_manager.config['zlevel'] = zlevel
+        if block_size != 256:
+            config_manager.config['block_size'] = [block_size, block_size]
+        if resampling != 'nearest':
+            config_manager.config['overviews']['resampling'] = resampling
+        if tile_size != 512:
+            config_manager.config['tile_size'] = [tile_size, tile_size]
+        # Parse overview levels from comma-separated string and convert to list of ints
+        if overview_levels != '2,4,8,16':
+            levels_list = [int(x.strip()) for x in overview_levels.split(',')]
+            config_manager.config['overviews']['levels'] = levels_list
+        if overwrite:
+            config_manager.config['overwrite'] = True
+        # Handle projection parameters
+        if dst_proj:
+            config_manager.config['projection'] = config_manager.config.get('projection', {})
+            config_manager.config['projection']['target'] = dst_proj
+        if src_proj:
+            config_manager.config['projection'] = config_manager.config.get('projection', {})
+            config_manager.config['projection']['source'] = src_proj
+        # Handle metadata source parameter
+        if metadata_source:
+            config_manager.config['metadata'] = config_manager.config.get('metadata', {})
+            config_manager.config['metadata']['source'] = metadata_source
+        # Validate configuration
+        config_manager.validate()
+        # Setup processing engine
+        engine = ProcessingEngine(config_manager)
+        # Setup file discovery
+        input_path_obj = Path(input_path)
+        output_path_obj = Path(output_path)
+        discovery = FileDiscovery(input_path_obj)
+        # Detect single-file mode: input is a file AND output path ends with .tif
+        single_file_mode = input_path_obj.is_file() and str(output_path).endswith('.tif')
+        # Detect multi-dimensional NC: input is a file with GDAL subdatasets
+        multi_dim_mode = False
+        if input_path_obj.is_file():
+            analyzer = NCAnalyzer(input_path_obj)
+            subdatasets = analyzer.get_subdatasets()
+            if len(subdatasets) > 0:
+                multi_dim_mode = True
+                variables_list = [v.strip() for v in variables.split(',')] if variables else analyzer.get_data_variables()
+                if not variables_list:
+                    logger.info("No data variables found in the netCDF file")
+                    return
+        # Find all netCDF files
+        all_files = discovery.find_files()
+        logger.info(f"Found {len(all_files)} netCDF files to process")
+        if not single_file_mode and resume:
+            files_to_process = discovery.get_resume_state(output_path_obj, all_files)
+            logger.info(f"After resume check, {len(files_to_process)} files still need processing")
+        else:
+            files_to_process = all_files
+        if dry_run:
+            if multi_dim_mode:
+                logger.info("Dry run mode - multi-dimensional file detected:")
+                logger.info(f"  Input: {input_path_obj}")
+                if single_file_mode:
+                    logger.info(f"  Output: {output_path_obj} (variable: {variables_list[0]})")
+                else:
+                    logger.info(f"  Variables: {', '.join(variables_list)}")
+                    for var_name in variables_list:
+                        logger.info(f"    {var_name} -> {output_path_obj / f'{var_name}.tif'}")
+            else:
+                logger.info("Dry run mode - would process:")
+                for f in files_to_process:
+                    if single_file_mode:
+                        out_file = Path(output_path)
+                    elif input_path_obj.is_file():
+                        out_file = output_path_obj / input_path_obj.with_suffix('.tif').name
+                    else:
+                        relative_path = f.relative_to(input_path_obj)
+                        out_file = output_path_obj / relative_path.with_suffix('.tif')
+                    logger.info(f"  {f} -> {out_file}")
+            return
+        if not files_to_process:
+            logger.info("No files to process")
+            return
+        # Multi-dimensional NC processing
+        if multi_dim_mode:
+            logger.info(f"Multi-dimensional mode: converting variables: {', '.join(variables_list)}")
+            start_time = time.time()
+            if single_file_mode:
+                # Direct file output (single variable to specified .tif)
+                engine.convert_multiband_file(input_path_obj, output_path_obj, variables_list[0])
+                successful = 1
+                failed = 0
+            else:
+                # Directory output (all variables)
+                results = engine.convert_multiband(input_path_obj, output_path_obj, variables_list)
+                successful = sum(1 for v in results.values() if v)
+                failed = sum(1 for v in results.values() if not v)
+            elapsed_time = time.time() - start_time
+            logger.info(f"\nProcessing complete!")
+            logger.info(f"Successful: {successful}")
+            logger.info(f"Failed: {failed}")
+            logger.info(f"Total: {successful + failed}")
+            logger.info(f"Elapsed time: {elapsed_time:.2f} seconds")
+            if failed > 0:
+                sys.exit(1)
+            return
+        # Process files
+        successful = 0
+        failed = 0
+        start_time = time.time()
+        logger.info("Starting conversion process...")
+        for i, input_file in enumerate(files_to_process):
+            try:
+                # Generate output file path
+                if single_file_mode:
+                    output_file = Path(output_path)
+                elif input_path_obj.is_file():
+                    output_file = output_path_obj / input_path_obj.with_suffix('.tif').name
+                else:
+                    relative_path = input_file.relative_to(input_path_obj)
+                    output_file = output_path_obj / relative_path.with_suffix('.tif')
+                # Skip if file exists and overwrite is not enabled
+                if output_file.exists() and not overwrite:
+                    logger.warning(f"Output file exists, skipping: {output_file}")
+                    continue
+                logger.info(f"[{i+1}/{len(files_to_process)}] Processing: {input_file.name}")
+                # Validate input file
+                engine.validate_input(input_file)
+                # Convert the file
+                result = engine.convert_file(input_file, output_file)
+                if result:
+                    successful += 1
+                    logger.info(f"  ✓ Completed: {output_file.name}")
+                else:
+                    failed += 1
+                    logger.error(f"  ✗ Failed: {input_file.name}")
+            except Exception as e:
+                failed += 1
+                logger.error(f"  ✗ Failed to process {input_file.name}: {str(e)}")
+                # Continue with other files if skip_errors is enabled
+                if not config_manager.get('skip_errors', True):
+                    raise
+        # Print summary
+        elapsed_time = time.time() - start_time
+        logger.info(f"\nProcessing complete!")
+        logger.info(f"Successful: {successful}")
+        logger.info(f"Failed: {failed}")
+        logger.info(f"Total: {successful + failed}")
+        logger.info(f"Elapsed time: {elapsed_time:.2f} seconds")
+        if failed > 0:
+            sys.exit(1)
+    except NC2COGError as e:
+        logger.error(f"NC2COG Error: {str(e)}")
+        sys.exit(1)
+    except KeyboardInterrupt:
+        logger.info("\nProcessing interrupted by user")
+        sys.exit(1)
+    except Exception as e:
+        logger.error(f"Unexpected error: {str(e)}")
+        sys.exit(1)
+if __name__ == '__main__':
+    main()

nc2cog/config.py ADDED Viewed

@@ -0,0 +1,158 @@
+"""Configuration management for netCDF to COG TIFF converter."""
+import yaml
+from pathlib import Path
+from typing import Dict, Any, Optional
+from .errors import ConfigError
+class ConfigManager:
+    """Manages configuration for the netCDF to COG TIFF converter."""
+    def __init__(self, config_path: Optional[Path] = None):
+        """
+        Initialize configuration manager.
+        Args:
+            config_path: Optional path to config file
+        """
+        self.default_config_path = Path(__file__).parent.parent.parent / "config" / "default_config.yaml"
+        self.user_config_path = config_path
+        # Load and merge configurations
+        self._config = self._load_default_config()
+        if config_path:
+            user_config = self._load_user_config(config_path)
+            self._config = self._merge_configs(self._config, user_config)
+    def _load_default_config(self) -> Dict[str, Any]:
+        """Load default configuration from file."""
+        if not self.default_config_path.exists():
+            raise ConfigError(f"Default configuration file not found: {self.default_config_path}")
+        with open(self.default_config_path, 'r') as f:
+            try:
+                return yaml.safe_load(f) or {}
+            except yaml.YAMLError as e:
+                raise ConfigError(f"Invalid YAML in default config: {e}")
+    def _load_user_config(self, config_path: Path) -> Dict[str, Any]:
+        """Load user configuration from file."""
+        if not config_path.exists():
+            raise ConfigError(f"Configuration file not found: {config_path}")
+        with open(config_path, 'r') as f:
+            try:
+                return yaml.safe_load(f) or {}
+            except yaml.YAMLError as e:
+                raise ConfigError(f"Invalid YAML in config file: {config_path}: {e}")
+    def _merge_configs(self, default: Dict[str, Any], user: Dict[str, Any]) -> Dict[str, Any]:
+        """Recursively merge user config into default config."""
+        result = default.copy()
+        for key, value in user.items():
+            if key in result and isinstance(result[key], dict) and isinstance(value, dict):
+                result[key] = self._merge_configs(result[key], value)
+            else:
+                result[key] = value
+        return result
+    @property
+    def config(self) -> Dict[str, Any]:
+        """Get the merged configuration."""
+        return self._config
+    def get(self, key: str, default: Any = None) -> Any:
+        """
+        Get a configuration value using dot notation.
+        Args:
+            key: Configuration key using dot notation (e.g., 'processing.compression')
+            default: Default value if key is not found
+        Returns:
+            Configuration value or default
+        """
+        keys = key.split('.')
+        value = self._config
+        for k in keys:
+            if isinstance(value, dict) and k in value:
+                value = value[k]
+            else:
+                return default
+        return value
+    def validate(self) -> None:
+        """Validate configuration values."""
+        # Compression type validation
+        compression = self.get('compression', 'deflate')
+        valid_compressions = ['deflate', 'lzw', 'jpeg']
+        if compression not in valid_compressions:
+            raise ConfigError(f"Invalid compression type: {compression}. Valid options: {valid_compressions}")
+        # Tile size validation
+        tile_size = self.get('tile_size', [512, 512])
+        if not isinstance(tile_size, list) or len(tile_size) != 2:
+            raise ConfigError(f"Invalid tile_size: {tile_size}. Must be a list of two integers.")
+        for size in tile_size:
+            if not isinstance(size, int) or size <= 0:
+                raise ConfigError(f"Invalid tile size: {size}. Must be a positive integer.")
+        # Block size validation
+        block_size = self.get('block_size', [256, 256])
+        if not isinstance(block_size, list) or len(block_size) != 2:
+            raise ConfigError(f"Invalid block_size: {block_size}. Must be a list of two integers.")
+        for size in block_size:
+            if not isinstance(size, int) or size <= 0:
+                raise ConfigError(f"Invalid block size: {size}. Must be a positive integer.")
+        # Z-level validation
+        zlevel = self.get('zlevel', 6)
+        if not isinstance(zlevel, int) or zlevel < 1 or zlevel > 9:
+            raise ConfigError(f"Invalid zlevel: {zlevel}. Must be an integer between 1 and 9.")
+        # Overviews resampling validation
+        resampling = self.get('overviews.resampling', 'nearest')
+        valid_resampling_methods = ['nearest', 'bilinear', 'cubic', 'cubicspline', 'lanczos', 'average', 'mode']
+        if resampling not in valid_resampling_methods:
+            raise ConfigError(f"Invalid resampling method: {resampling}. Valid options: {valid_resampling_methods}")
+        # Projection parameters validation
+        source_projection = self.get('projection.source', None)
+        target_projection = self.get('projection.target', None)
+        resampling_method = self.get('projection.resampling_method', 'nearest')
+        if source_projection is not None:
+            # Validate source projection format (expecting EPSG:XXXX format)
+            if not isinstance(source_projection, str) or not source_projection.upper().startswith('EPSG:'):
+                raise ConfigError(f"Invalid source projection format: {source_projection}. Expected format: 'EPSG:XXXX'")
+            # Validate EPSG code structure (should be EPSG:number)
+            try:
+                epsg_code = source_projection.split(':')[1]
+                int(epsg_code)  # Verify it's a valid integer
+            except (IndexError, ValueError):
+                raise ConfigError(f"Invalid source EPSG code: {source_projection}. Expected format: 'EPSG:XXXX' where XXXX is a number")
+        if target_projection is not None:
+            # Validate target projection format (expecting EPSG:XXXX format)
+            if not isinstance(target_projection, str) or not target_projection.upper().startswith('EPSG:'):
+                raise ConfigError(f"Invalid target projection format: {target_projection}. Expected format: 'EPSG:XXXX'")
+            # Validate EPSG code structure (should be EPSG:number)
+            try:
+                epsg_code = target_projection.split(':')[1]
+                int(epsg_code)  # Verify it's a valid integer
+            except (IndexError, ValueError):
+                raise ConfigError(f"Invalid target EPSG code: {target_projection}. Expected format: 'EPSG:XXXX' where XXXX is a number")
+        # Validate reprojection resampling method
+        valid_reprojection_methods = ['nearest', 'bilinear', 'cubic', 'cubicspline', 'lanczos', 'average', 'mode', 'max', 'min', 'med', 'q1', 'q3']
+        if resampling_method not in valid_reprojection_methods:
+            raise ConfigError(f"Invalid reprojection resampling method: {resampling_method}. Valid options: {valid_reprojection_methods}")