PyPI - eotdl - Versions diffs - 2024.10.7__py3-none-any.whl → 2025.3.25__py3-none-any.whl - Mend

eotdl 2024.10.7py3-none-any.whl → 2025.3.25py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

eotdl/__init__.py +1 -1
eotdl/access/search.py +0 -2
eotdl/access/sentinelhub/parameters.py +1 -1
eotdl/cli.py +2 -2
eotdl/commands/datasets.py +28 -31
eotdl/commands/models.py +27 -30
eotdl/commands/stac.py +57 -0
eotdl/curation/__init__.py +0 -8
eotdl/curation/stac/__init__.py +1 -8
eotdl/curation/stac/api.py +58 -0
eotdl/curation/stac/stac.py +31 -341
eotdl/datasets/__init__.py +1 -1
eotdl/datasets/ingest.py +28 -159
eotdl/datasets/retrieve.py +0 -9
eotdl/datasets/stage.py +64 -0
eotdl/files/__init__.py +0 -2
eotdl/files/ingest.bck +178 -0
eotdl/files/ingest.py +229 -164
eotdl/{datasets → files}/metadata.py +16 -17
eotdl/models/__init__.py +1 -1
eotdl/models/ingest.py +28 -159
eotdl/models/stage.py +60 -0
eotdl/repos/APIRepo.py +1 -1
eotdl/repos/DatasetsAPIRepo.py +56 -43
eotdl/repos/FilesAPIRepo.py +260 -167
eotdl/repos/STACAPIRepo.py +40 -0
eotdl/repos/__init__.py +1 -0
eotdl/tools/geo_utils.py +7 -2
{eotdl-2024.10.7.dist-info → eotdl-2025.3.25.dist-info}/METADATA +5 -4
eotdl-2025.3.25.dist-info/RECORD +65 -0
{eotdl-2024.10.7.dist-info → eotdl-2025.3.25.dist-info}/WHEEL +1 -1
eotdl/curation/stac/assets.py +0 -110
eotdl/curation/stac/dataframe.py +0 -172
eotdl/curation/stac/dataframe_bck.py +0 -253
eotdl/curation/stac/dataframe_labeling.py +0 -63
eotdl/curation/stac/extensions/__init__.py +0 -23
eotdl/curation/stac/extensions/base.py +0 -30
eotdl/curation/stac/extensions/dem.py +0 -18
eotdl/curation/stac/extensions/eo.py +0 -117
eotdl/curation/stac/extensions/label/__init__.py +0 -7
eotdl/curation/stac/extensions/label/base.py +0 -136
eotdl/curation/stac/extensions/label/image_name_labeler.py +0 -203
eotdl/curation/stac/extensions/label/scaneo.py +0 -219
eotdl/curation/stac/extensions/ml_dataset.py +0 -648
eotdl/curation/stac/extensions/projection.py +0 -44
eotdl/curation/stac/extensions/raster.py +0 -53
eotdl/curation/stac/extensions/sar.py +0 -55
eotdl/curation/stac/extent.py +0 -158
eotdl/curation/stac/parsers.py +0 -61
eotdl/datasets/download.py +0 -104
eotdl/files/list_files.py +0 -13
eotdl/models/download.py +0 -101
eotdl/models/metadata.py +0 -43
eotdl/wrappers/utils.py +0 -35
eotdl-2024.10.7.dist-info/RECORD +0 -82
{eotdl-2024.10.7.dist-info → eotdl-2025.3.25.dist-info}/entry_points.txt +0 -0

eotdl/curation/stac/assets.py DELETED Viewed

@@ -1,110 +0,0 @@
-"""
-Module for STAC Asset Generators
-"""
-from os import remove, listdir
-from os.path import dirname, join, basename, abspath, basename
-import pandas as pd
-import rasterio
-import pystac
-from ...tools.metadata import remove_raster_metadata
-MEDIA_TYPES_DICT = {
-    "tif": pystac.MediaType.GEOTIFF,
-    "tiff": pystac.MediaType.GEOTIFF,
-    "png": pystac.MediaType.PNG,
-    "jpg": pystac.MediaType.JPEG,
-    "jpeg": pystac.MediaType.JPEG,
-}
-class STACAssetGenerator:
-    """
-    Standard STAC Asset Generator
-    """
-    type = "None"
-    def __init__(self):
-        pass
-    @classmethod
-    def extract_assets(cls, obj_info: pd.DataFrame):
-        """
-        Generate a single asset from the raster file
-        :param raster_path: path to the raster file
-        """
-        # If there is no bands, create a single band asset from the file, assuming thats a singleband raster
-        raster_path = obj_info["image"].values[0]
-        title = basename(raster_path).split(".")[0]
-        # Get the file extension
-        raster_format = raster_path.split(".")[-1]
-        asset = pystac.Asset(
-            href=abspath(raster_path),
-            title=title,
-            media_type=MEDIA_TYPES_DICT[raster_format],
-            roles=["data"],
-        )
-        return [asset]
-class BandsAssetGenerator(STACAssetGenerator):
-    """
-    Bands STAC Asset Generator
-    """
-    type = "Bands"
-    def __init__(self) -> None:
-        super().__init__()
-    def extract_assets(self, obj_info: pd.DataFrame):
-        """
-        Extract the assets from the raster file from the bands column
-        :param raster_path: path to the raster file
-        """
-        asset_list = []
-        # File path
-        raster_path = obj_info["image"].values[0]
-        # Bands
-        bands = obj_info["bands"].values
-        bands = bands[0] if bands else None
-        if bands:
-            with rasterio.open(raster_path, "r") as raster:
-                raster_name = basename(raster_path).split(".")[0]
-                if isinstance(bands, str):
-                    bands = [bands]
-                for band in bands:
-                    i = bands.index(band)
-                    raster_format = raster_path.split(".")[
-                        -1
-                    ]  # Will be used later to save the bands files
-                    try:
-                        single_band = raster.read(i + 1)
-                    except IndexError:
-                        single_band = raster.read(1)
-                    band_name = f"{raster_name}_{band}.{raster_format}"
-                    output_band = join(dirname(raster_path), band_name)
-                    # Copy the metadata
-                    metadata = raster.meta.copy()
-                    metadata.update({"count": 1})
-                    # Write the band to the output folder
-                    with rasterio.open(output_band, "w", **metadata) as dest:
-                        dest.write(single_band, 1)
-                    # Instantiate pystac asset and append it to the list
-                    asset_list.append(
-                        pystac.Asset(
-                            href=output_band,
-                            title=band,
-                            media_type=MEDIA_TYPES_DICT[raster_format],
-                        )
-                    )
-            return asset_list

eotdl/curation/stac/dataframe.py DELETED Viewed

@@ -1,172 +0,0 @@
-"""
-Module for the STAC dataframe
-"""
-import json
-from os.path import join
-from os import makedirs
-from typing import Union, Optional
-from math import isnan
-from pathlib import Path
-import pandas as pd
-import geopandas as gpd
-import pystac
-from geomet import wkt
-from ...tools import convert_df_geom_to_shape, get_all_children
-class STACDataFrame(gpd.GeoDataFrame):
-    """
-    STACDataFrame class
-    """
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-    @classmethod
-    def from_stac_file(cls, stac_file: pystac.STACObject):
-        """
-        Create a STACDataFrame from a STAC file
-        :param stac_file: STAC file
-        """
-        return read_stac(stac_file)
-    def to_stac(self, path):
-        """
-        Create a STAC catalog and children from a STACDataFrame
-        """
-        df = self.copy()
-        if "id" in df.columns and "stac_id" in df.columns:
-            id_column = "stac_id"
-            stac_id_exists = True
-        else:
-            id_column = "id"
-            stac_id_exists = False
-        # First, create the catalog and its folder, if exists
-        catalog_df = df[df["type"] == "Catalog"]
-        if catalog_df.empty:
-            makedirs(path, exist_ok=True)
-        else:
-            for _, row in catalog_df.iterrows():
-                root_output_folder = path + "/" + row[id_column]
-                makedirs(root_output_folder, exist_ok=True)
-                row_json = row.to_dict()
-                # Curate the json row
-                row_json = self.curate_json_row(row_json, stac_id_exists)
-                with open(
-                    join(root_output_folder, "catalog.json"), "w", encoding="utf-8"
-                ) as f:
-                    json.dump(row_json, f)
-        # Second, create the collections and their folders, if exist
-        collections = {}
-        collections_df = df[df["type"] == "Collection"]
-        for _, row in collections_df.iterrows():
-            stac_output_folder = join(root_output_folder, row[id_column])
-            collections[row[id_column]] = stac_output_folder
-            makedirs(stac_output_folder, exist_ok=True)
-            row_json = row.to_dict()
-            # Curate the json row
-            row_json = self.curate_json_row(row_json, stac_id_exists)
-            with open(
-                join(stac_output_folder, "collection.json"), "w", encoding="utf-8"
-            ) as f:
-                json.dump(row_json, f)
-        # Then, create the items and their folders, if exist
-        features_df = df[df["type"] == "Feature"]
-        for _, row in features_df.iterrows():
-            collection = row["collection"]
-            stac_output_folder = join(collections[collection], row[id_column])
-            # Convert the geometry from WKT back to geojson
-            row["geometry"] = row["geometry"].wkt
-            row["geometry"] = wkt.loads(row["geometry"])
-            makedirs(stac_output_folder, exist_ok=True)
-            row_json = row.to_dict()
-            # Curate the json row
-            row_json = self.curate_json_row(row_json, stac_id_exists)
-            with open(
-                join(stac_output_folder, f'{row_json["id"]}.json'),
-                "w",
-                encoding="utf-8",
-            ) as f:
-                json.dump(row_json, f)
-    def curate_json_row(self, row: dict, stac_id_exists: bool) -> dict:
-        """
-        Curate the json row of a STACDataFrame, in order to generate a valid STAC file
-        :param row: row of a STACDataFrame
-        :param stac_id_exists: if the stac_id column exists
-        """
-        keys_to_remove = []
-        # Remove the created_at and modified_at columns, if the STACDataFrame comes from GeoDB
-        for i in "created_at", "modified_at":
-            if i in row.keys():
-                keys_to_remove.append(i)
-        # Rename the stac_id column to id, to avoid conflicts with the id column
-        if stac_id_exists:
-            row["id"] = row["stac_id"]
-            del row["stac_id"]
-        # Remove the NaN values and empty strings
-        for k, v in row.items():
-            if (isinstance(v, float) and isnan(v)) or v == "" or not v:
-                keys_to_remove.append(k)
-        for key in keys_to_remove:
-            if key in row.keys():
-                del row[key]
-        # Convert the value to dict if it is a string and is possible
-        for k, v in row.items():
-            if isinstance(v, str):
-                try:
-                    row[k] = json.loads(v)
-                except json.decoder.JSONDecodeError:
-                    pass
-        return row
-def read_stac(
-    stac_file: Union[pystac.Catalog, pystac.Collection, str],
-    geometry_column: Optional[str] = "geometry",
-) -> STACDataFrame:
-    """
-    Read a STAC file and return a STACDataFrame
-    :param stac_file: STAC file to read
-    :param geometry_column: name of the geometry column
-    """
-    if isinstance(stac_file, (str, Path)):
-        stac_file = pystac.read_file(stac_file)  # we assume this is always a catalog
-    stac_file.make_all_asset_hrefs_absolute()
-    children = get_all_children(stac_file)
-    # Convert Dataframe to STACDataFrame
-    dataframe = pd.DataFrame(children)
-    dataframe[geometry_column] = dataframe.apply(convert_df_geom_to_shape, axis=1)
-    stac_dataframe = STACDataFrame(
-        dataframe,
-        crs="EPSG:4326",
-        geometry=gpd.GeoSeries.from_wkt(dataframe[geometry_column]),
-    )
-    return stac_dataframe

eotdl/curation/stac/dataframe_bck.py DELETED Viewed

@@ -1,253 +0,0 @@
-"""
-Module for the STAC dataframe
-"""
-import pandas as pd
-import geopandas as gpd
-import pystac
-import json
-import os
-from xcube_geodb.core.geodb import GeoDBClient
-from geomet import wkt
-from os.path import join
-from os import makedirs
-from math import isnan
-from .utils import convert_df_geom_to_shape, get_all_children
-class STACDataFrame(gpd.GeoDataFrame):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-    @classmethod
-    def from_stac_file(self, stac_file):
-        """
-        Create a STACDataFrame from a STAC file
-        """
-        return read_stac(stac_file)
-    @classmethod
-    def from_geodb(
-        self,
-        server_url: str,
-        server_port: int | str,
-        client_id: str,
-        client_secret: str,
-        auth_aud: str,
-        collection: str,
-        database: str = None,
-    ):
-        """
-        Create a STACDataFrame from a GeoDB collection
-        :param server_url: GeoDB server url
-        :param server_port: GeoDB server port
-        :param client_id: GeoDB client id
-        :param client_secret: GeoDB client secret
-        :param auth_aud: GeoDB auth aud
-        :param collection: GeoDB collection
-        :param database: GeoDB database
-        """
-        geodb_client = GeoDBClient(
-            server_url=server_url,
-            server_port=server_port,
-            client_id=client_id,
-            client_secret=client_secret,
-            auth_aud=auth_aud,
-        )
-        data = geodb_client.get_collection(collection, database=database)
-        return STACDataFrame(data, crs="EPSG:4326")
-    def ingest(
-        self,
-        collection: str,
-        server_url: str = os.environ["SERVER_URL"],
-        server_port: int = os.environ["SERVER_PORT"],
-        client_id: str = os.environ["CLIENT_ID"],
-        client_secret: str = os.environ["CLIENT_SECRET"],
-        auth_aud: str = os.environ["AUTH_DOMAIN"],
-        database: str = None,
-    ):
-        """
-        Create a GeoDB collection from a STACDataFrame
-        :param collection: dataset name (GeoDB collection)
-        :param server_url: GeoDB server url
-        :param server_port: GeoDB server port
-        :param client_id: GeoDB client id
-        :param client_secret: GeoDB client secret
-        :param auth_aud: GeoDB auth aud
-        :param database: GeoDB database
-        """
-        geodb_client = GeoDBClient(
-            server_url=server_url,
-            server_port=server_port,
-            client_id=client_id,
-            client_secret=client_secret,
-            auth_aud=auth_aud,
-        )
-        # TODO: check name is unique (use eotdl-cli)
-        # TODO: ingest assets (only if local)
-        # TODO: rename assets in the dataframe with URLs (only if local)
-        # ingest to geodb
-        # Check if the collection already exists
-        if geodb_client.collection_exists(collection, database=database):
-            # geodb_client.drop_collection(collection, database=database)
-            raise Exception(f"Collection {collection} already exists")
-        # Rename the column id to stac_id, to avoid conflicts with the id column
-        self.rename(columns={"id": "stac_id"}, inplace=True)
-        # Fill the NaN with '' to avoid errors, except in the geometry column
-        copy = self.copy()
-        columns_to_fill = copy.columns.drop("geometry")
-        self[columns_to_fill] = self[columns_to_fill].fillna("")
-        # Create the collection if it does not exist
-        # and insert the data
-        collections = {collection: self._create_collection_structure(self.columns)}
-        geodb_client.create_collections(collections, database=database)
-        geodb_client.insert_into_collection(collection, database=database, values=self)
-        # TODO: save data in eotdl
-    def _create_collection_structure(self, columns: list) -> dict:
-        """
-        Create the schema structure of a GeoDB collection from a STACDataFrame
-        :param columns: columns of the STACDataFrame
-        """
-        stac_collection = {"crs": 4326, "properties": {}}
-        for column in columns:
-            if column not in ("geometry", "id"):
-                stac_collection["properties"][column] = "json"
-        return stac_collection
-    def to_stac(self):
-        """
-        Create a STAC catalog and children from a STACDataFrame
-        """
-        df = self.copy()
-        if "id" in df.columns and "stac_id" in df.columns:
-            id_column = "stac_id"
-            stac_id_exists = True
-        else:
-            id_column = "id"
-            stac_id_exists = False
-        # First, create the catalog and its folder, if exists
-        catalog_df = df[df["type"] == "Catalog"]
-        if catalog_df.empty:
-            root_output_folder = "output"
-            makedirs(root_output_folder, exist_ok=True)
-        else:
-            for index, row in catalog_df.iterrows():
-                root_output_folder = row[id_column]
-                makedirs(root_output_folder, exist_ok=True)
-                row_json = row.to_dict()
-                # Curate the json row
-                row_json = self.curate_json_row(row_json, stac_id_exists)
-                with open(join(root_output_folder, "catalog.json"), "w") as f:
-                    json.dump(row_json, f)
-        # Second, create the collections and their folders, if exist
-        collections = dict()
-        collections_df = df[df["type"] == "Collection"]
-        for index, row in collections_df.iterrows():
-            stac_output_folder = join(root_output_folder, row[id_column])
-            collections[row[id_column]] = stac_output_folder
-            makedirs(stac_output_folder, exist_ok=True)
-            row_json = row.to_dict()
-            # Curate the json row
-            row_json = self.curate_json_row(row_json, stac_id_exists)
-            with open(join(stac_output_folder, "collection.json"), "w") as f:
-                json.dump(row_json, f)
-        # Then, create the items and their folders, if exist
-        features_df = df[df["type"] == "Feature"]
-        for index, row in features_df.iterrows():
-            collection = row["collection"]
-            stac_output_folder = join(collections[collection], row[id_column])
-            # Convert the geometry from WKT back to geojson
-            row["geometry"] = row["geometry"].wkt
-            row["geometry"] = wkt.loads(row["geometry"])
-            makedirs(stac_output_folder, exist_ok=True)
-            row_json = row.to_dict()
-            # Curate the json row
-            row_json = self.curate_json_row(row_json, stac_id_exists)
-            with open(join(stac_output_folder, f'{row_json["id"]}.json'), "w") as f:
-                json.dump(row_json, f)
-    def curate_json_row(self, row: dict, stac_id_exists: bool) -> dict:
-        """
-        Curate the json row of a STACDataFrame, in order to generate a valid STAC file
-        :param row: row of a STACDataFrame
-        :param stac_id_exists: if the stac_id column exists
-        """
-        keys_to_remove = list()
-        # Remove the created_at and modified_at columns, if the STACDataFrame comes from GeoDB
-        for i in "created_at", "modified_at":
-            if i in row.keys():
-                keys_to_remove.append(i)
-        # Rename the stac_id column to id, to avoid conflicts with the id column
-        if stac_id_exists:
-            row["id"] = row["stac_id"]
-            del row["stac_id"]
-        # Remove the NaN values and empty strings
-        for k, v in row.items():
-            if (isinstance(v, float) and isnan(v)) or v == "":
-                keys_to_remove.append(k)
-        for key in keys_to_remove:
-            del row[key]
-        del row["geometry"]
-        return row
-def read_stac(
-    stac_file: pystac.Catalog | pystac.Collection | str,
-    geometry_column: str = "geometry",
-) -> STACDataFrame:
-    """
-    Read a STAC file and return a STACDataFrame
-    :param stac_file: STAC file to read
-    :param geometry_column: name of the geometry column
-    """
-    if isinstance(stac_file, str):
-        stac_file = pystac.read_file(stac_file)
-    children = get_all_children(stac_file)
-    # Convert Dataframe to STACDataFrame
-    dataframe = pd.DataFrame(children)
-    dataframe[geometry_column] = dataframe.apply(convert_df_geom_to_shape, axis=1)
-    stac_dataframe = STACDataFrame(
-        dataframe,
-        crs="EPSG:4326",
-        geometry=gpd.GeoSeries.from_wkt(dataframe[geometry_column]),
-    )
-    return stac_dataframe

eotdl/curation/stac/dataframe_labeling.py DELETED Viewed

@@ -1,63 +0,0 @@
-"""
-Module for the labeling strategy when creating a STAC catalog from a dataframe
-"""
-from os.path import basename
-class LabelingStrategy:
-    """
-    Labeling strategy interface to be implemented by concrete labeling strategies
-    """
-    def get_images_labels(self, images):
-        """
-        Get the labels of the images
-        """
-        return
-class UnlabeledStrategy(LabelingStrategy):
-    """
-    Assumes the images are not labeled, and returns the entire filename as label
-    """
-    def __init__(self):
-        super().__init__()
-    def get_images_labels(self, images):
-        """
-        Get the labels of the images
-        """
-        labels = []
-        for image in images:
-            labels.append(basename(image).split(".")[0])
-        ixs = [labels.index(x) for x in labels]
-        return labels, ixs
-class LabeledStrategy(LabelingStrategy):
-    """
-    Assumes the images are already labeled, and returns the labels.
-    The images filenames must follow the pattern: <label>_<id>.<ext>
-    """
-    def __init__(self):
-        super().__init__()
-    def get_images_labels(self, images):
-        """
-        Get the labels of the images
-        """
-        labels = []
-        for image in images:
-            image_basename = basename(image).split(".")[
-                0
-            ]  # Get filename without extension
-            label = image_basename.split("_")[0]
-            labels.append(label)
-        ixs = [labels.index(x) for x in labels]
-        return labels, ixs

eotdl/curation/stac/extensions/__init__.py DELETED Viewed

@@ -1,23 +0,0 @@
-"""
-STAC extensions module
-"""
-from .sar import SarExtensionObject
-from .raster import RasterExtensionObject
-from .projection import ProjExtensionObject
-from .dem import DEMExtensionObject
-from .eo import EOS2ExtensionObject
-from .label import LabelExtensionObject, ImageNameLabeler, ScaneoLabeler
-from .ml_dataset import add_ml_extension, MLDatasetQualityMetrics
-SUPPORTED_EXTENSIONS = ("eo", "sar", "proj", "raster")
-type_stac_extensions_dict = {
-    "sar": SarExtensionObject(),
-    "eo": EOS2ExtensionObject(),
-    "dem": DEMExtensionObject(),
-    "raster": RasterExtensionObject(),
-    "proj": ProjExtensionObject(),
-}

eotdl/curation/stac/extensions/base.py DELETED Viewed

@@ -1,30 +0,0 @@
-"""
-Module for STAC extensions objects
-"""
-from typing import Optional, Union
-import pystac
-import pandas as pd
-class STACExtensionObject:
-    """
-    Base model for STAC extensions objects
-    """
-    def __init__(self) -> None:
-        super().__init__()
-        self.properties = {}
-    def add_extension_to_object(
-        self,
-        obj: Union[pystac.Item, pystac.Asset],
-        obj_info: Optional[pd.DataFrame] = None,
-    ) -> Union[pystac.Item, pystac.Asset]:
-        """
-        Add the extension to the given object
-        :param obj: object to add the extension
-        :param obj_info: object info from the STACDataFrame
-        """
-        return

eotdl/curation/stac/extensions/dem.py DELETED Viewed

@@ -1,18 +0,0 @@
-"""
-Module for DEM STAC extensions object
-"""
-from .base import STACExtensionObject
-class DEMExtensionObject(STACExtensionObject):
-    """
-    DEM STAC extension object
-    """
-    DEM_DATE_ACQUIRED = {
-        "start_datetime": "2011-01-01T00:00:00Z",
-        "end_datetime": "2015-01-07T00:00:00Z",
-    }
-    def __init__(self) -> None:
-        super().__init__()

eotdl 2024.10.7__py3-none-any.whl → 2025.3.25__py3-none-any.whl

eotdl 2024.10.7py3-none-any.whl → 2025.3.25py3-none-any.whl