PyPI - eotdl - Versions diffs - 2023.10.25.post10__py3-none-any.whl → 2023.11.2.post2__py3-none-any.whl - Mend

eotdl 2023.10.25.post10py3-none-any.whl → 2023.11.2.post2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

eotdl/__init__.py +1 -1
eotdl/cli.py +6 -2
eotdl/commands/auth.py +18 -1
eotdl/commands/datasets.py +61 -11
eotdl/commands/models.py +108 -0
eotdl/curation/__init__.py +1 -4
eotdl/curation/stac/assets.py +2 -1
eotdl/curation/stac/dataframe.py +1 -1
eotdl/curation/stac/extensions/label/image_name_labeler.py +6 -5
eotdl/curation/stac/extensions/ml_dataset.py +15 -25
eotdl/curation/stac/extent.py +1 -1
eotdl/curation/stac/stac.py +1 -1
eotdl/datasets/download.py +5 -4
eotdl/datasets/ingest.py +25 -154
eotdl/datasets/retrieve.py +1 -1
eotdl/files/__init__.py +1 -0
eotdl/files/ingest.py +175 -0
eotdl/models/__init__.py +3 -0
eotdl/models/download.py +119 -0
eotdl/models/ingest.py +47 -0
eotdl/models/metadata.py +16 -0
eotdl/models/retrieve.py +26 -0
eotdl/repos/FilesAPIRepo.py +136 -95
eotdl/repos/ModelsAPIRepo.py +40 -0
eotdl/repos/__init__.py +1 -0
eotdl/shared/__init__.py +1 -0
eotdl/tools/__init__.py +5 -6
eotdl/tools/geo_utils.py +15 -1
eotdl/tools/stac.py +144 -8
eotdl/tools/time_utils.py +19 -6
eotdl/tools/tools.py +2 -3
{eotdl-2023.10.25.post10.dist-info → eotdl-2023.11.2.post2.dist-info}/METADATA +1 -1
{eotdl-2023.10.25.post10.dist-info → eotdl-2023.11.2.post2.dist-info}/RECORD +38 -35
eotdl/curation/folder_formatters/__init__.py +0 -1
eotdl/curation/folder_formatters/base.py +0 -19
eotdl/curation/folder_formatters/sentinel_hub.py +0 -135
eotdl/curation/stac/utils/__init__.py +0 -5
eotdl/curation/stac/utils/geometry.py +0 -22
eotdl/curation/stac/utils/stac.py +0 -143
eotdl/curation/stac/utils/time.py +0 -21
/eotdl/{datasets/utils.py → shared/checksum.py} +0 -0
/eotdl/{curation/stac/utils → tools}/metadata.py +0 -0
/eotdl/{curation/stac/utils → tools}/paths.py +0 -0
{eotdl-2023.10.25.post10.dist-info → eotdl-2023.11.2.post2.dist-info}/WHEEL +0 -0
{eotdl-2023.10.25.post10.dist-info → eotdl-2023.11.2.post2.dist-info}/entry_points.txt +0 -0

eotdl/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "2023.10.25-10"
1	+ __version__ = "2023.11.02-2"

eotdl/cli.py CHANGED Viewed

@@ -1,15 +1,19 @@
 import typer
-from .commands import auth, datasets
+from .commands import auth, datasets, models
 from . import __version__
-app = typer.Typer()
+app = typer.Typer(help='EOTDL command line interface.')
 app.add_typer(auth.app, name="auth")
 app.add_typer(datasets.app, name="datasets")
+app.add_typer(models.app, name="models")
 @app.command()
 def version():
+    """
+    Get EOTDL version.
+    """
     typer.echo(f"EOTDL Version: {__version__}")

eotdl/commands/auth.py CHANGED Viewed

@@ -2,11 +2,23 @@ import typer
 from ..auth import is_logged, auth, logout_user
 from ..auth.errors import LoginError
-app = typer.Typer()
+app = typer.Typer(help="EOTDL CLI authentication module.")
 @app.command()
 def login():
+    """
+    Login to the EOTDL.
+    This command will return a URL that you can visit to authenticate.
+    After authentication, your credentials will be stored locally.
+    This enables future commands to be executed without having to authenticate again
+    (at least while the credentials are valid).
+    \n\n
+    The default browser will be opened automatically. If not, you can copy the URL and paste it in your browser.
+    \n\n
+    By default, the credentials will be stored in the following file: ~/.eotdl/credentials.json
+    """
     try:
         user = auth()
         typer.echo(f"You are logged in as {user['email']}")
@@ -17,6 +29,11 @@ def login():
 @app.command()
 def logout():
+    """
+    Logout from the EOTDL.
+    You will receive a logout url that you can visit in case you want to authenticate with a different account.
+    """
     user = is_logged()
     if user:
         typer.echo(f"You are logged in as {user['email']}")

eotdl/commands/datasets.py CHANGED Viewed

@@ -7,14 +7,25 @@ from ..datasets import (
     download_dataset,
 )
-app = typer.Typer()
+app = typer.Typer(help="EOTDL CLI datasets module.")
 @app.command()
 def list(
-    name: str = typer.Option(None, "--name", "-n", help="Filter by name"),
-    limit: int = typer.Option(None, "--limit", "-l", help="Limit number of results"),
+    name: str = typer.Option(None, "--name", "-n", help="Filter the returned datasets by name"),
+    limit: int = typer.Option(None, "--limit", "-l", help="Limit the number of returned results"),
 ):
+    """
+    Retrieve a list with all the datasets in the EOTDL.
+    If using --name, it will filter the results by name. If no name is provided, it will return all the datasets.\n
+    If using --limit, it will limit the number of results. If no limit is provided, it will return all the datasets.
+    \n\n
+    Examples\n
+    --------\n
+    $ eotdl datasets list\n
+    $ eotdl datasets list --name YourModel --limit 5
+    """
     try:
         datasets = retrieve_datasets(name, limit)
         typer.echo(datasets)
@@ -24,9 +35,33 @@ def list(
 @app.command()
 def ingest(
-    path: Path = typer.Option(..., "--path", "-p", help="Path to dataset"),
-    verbose: bool = typer.Option(False, "--verbose", help="Verbose output"),
+    path: Path = typer.Option(..., "--path", "-p", help="Path to the dataset to ingest"),
+    verbose: bool = typer.Option(False, "--verbose", help="Verbose output. This will print the progress of the ingestion"),
 ):
+    """
+    Ingest a dataset to the EOTDL.
+    This command ingests the dataset to the EOTDL. The dataset must be a folder with the dataset files,
+    and at least a metadata.yml file or a catalog.json file. If there are not these files, the ingestion
+    will not work. All the files in the folder will be uploaded to the EOTDL.
+    \n\n
+    The following constraints apply to the dataset name:\n
+    - It must be unique\n
+    - It must be between 3 and 45 characters long\n
+    - It can only contain alphanumeric characters and dashes.\n
+    \n
+    The metadata.yml file must contain the following fields:\n
+    - name: the name of the dataset\n
+    - authors: the author or authors of the dataset\n
+    - license: the license of the dataset\n
+    - source: the source of the dataset\n
+    \n
+    If using --verbose, it will print the progress of the ingestion.
+    \n\n
+    Examples\n
+    --------\n
+    $ eotdl dataset ingest --path /path/to/folder-with-dataset --verbose True
+    """
     try:
         ingest_dataset(path, verbose, typer.echo)
     except Exception as e:
@@ -35,19 +70,34 @@ def ingest(
 @app.command()
 def get(
-    dataset: str,
-    path: str = typer.Option(None, "--path", "-p", help="Download to a specific path"),
-    file: str = typer.Option(None, "--file", "-f", help="Download a specific file"),
+    dataset: str = typer.Argument(None, help="Name of the dataset to download"),
+    path: str = typer.Option(None, "--path", "-p", help="Download the dataset to a specific output path"),
+    file: str = typer.Option(None, "--file", "-f", help="Download a specific file from the dataset"),
     version: int = typer.Option(None, "--version", "-v", help="Dataset version"),
-    assets: bool = typer.Option(False, "--assets", "-a", help="Download assets"),
+    assets: bool = typer.Option(False, "--assets", "-a", help="Download STAC assets from the dataset"),
     force: bool = typer.Option(
         False, "--force", "-f", help="Force download even if file exists"
     ),
-    verbose: bool = typer.Option(False, "--verbose", help="Verbose output"),
+    verbose: bool = typer.Option(False, "--verbose", help="Verbose output. This will print the progress of the download"),
 ):
+    """
+    Download a dataset from the EOTDL.
+    \n\n
+    If using --path, it will download the dataset to the specified path. If no path is provided, it will download to ~/.eotdl/datasets.\n
+    If using --file, it will download the specified file. If no file is provided, it will download the entire dataset.\n
+    If using --version, it will download the specified version. If no version is provided, it will download the latest version.\n
+    If using --assets when the dataset is STAC, it will also download the STAC assets of the dataset. If not provided, it will only download the STAC metadata.\n
+    If using --force, it will download the dataset even if the file already exists.\n
+    If using --verbose, it will print the progress of the download.
+    \n\n
+    Examples\n
+    --------\n
+    $ eotdl dataset get YourDataset\n
+    $ eotdl dataset get YourDataset --path /path/to/download --file dataset.zip --version 1 --assets True --force True --verbose True
+    """
     try:
         dst_path = download_dataset(
-            dataset, version, path, file, typer.echo, assets, force, verbose
+            dataset, version, path, typer.echo, assets, force, verbose
         )
         typer.echo(f"Data available at {dst_path}")
     except Exception as e:

eotdl/commands/models.py ADDED Viewed

@@ -0,0 +1,108 @@
+import typer
+from pathlib import Path
+from ..models import (
+    retrieve_models,
+    ingest_model,
+    download_model,
+)
+app = typer.Typer(help="EOTDL CLI models module.")
+@app.command()
+def list(
+    name: str = typer.Option(None, "--name", "-n", help="Filter the returned models by name"),
+    limit: int = typer.Option(None, "--limit", "-l", help="Limit the number of returned results"),
+):
+    """
+    Retrieve a list with all the models in the EOTDL.
+    If using --name, it will filter the results by name. If no name is provided, it will return all the models.\n
+    If using --limit, it will limit the number of results. If no limit is provided, it will return all the models.
+    \n\n
+    Examples\n
+    --------\n
+    $ eotdl models list\n
+    $ eotdl models list --name YourModel --limit 5
+    """
+    try:
+        models = retrieve_models(name, limit)
+        typer.echo(models)
+    except Exception as e:
+        typer.echo(e)
+@app.command()
+def ingest(
+    path: Path = typer.Option(..., "--path", "-p", help="Path to the model to ingest"),
+    verbose: bool = typer.Option(False, "--verbose", help="Verbose output. This will print the progress of the ingestion"),
+):
+    """
+    Ingest a model to the EOTDL.
+    This command ingests the model to the EOTDL. The model must be a folder with the model files,
+    and at least a metadata.yml file or a catalog.json file. If there are not these files, the ingestion
+    will not work. All the files in the folder will be uploaded to the EOTDL.
+    \n\n
+    The following constraints apply to the model name:\n
+    - It must be unique\n
+    - It must be between 3 and 45 characters long\n
+    - It can only contain alphanumeric characters and dashes.\n
+    \n
+    The metadata.yml file must contain the following fields:\n
+    - name: the name of the model\n
+    - authors: the author or authors of the model\n
+    - license: the license of the model\n
+    - source: the source of the model\n
+    \n
+    If using --verbose, it will print the progress of the ingestion.
+    \n\n
+    Examples\n
+    --------\n
+    $ eotdl models ingest --path /path/to/folder-with-model --verbose True
+    """
+    try:
+        ingest_model(path, verbose, typer.echo)
+    except Exception as e:
+        typer.echo(e)
+@app.command()
+def get(
+    model: str = typer.Argument(None, help="Name of the model to download"),
+    path: str = typer.Option(None, "--path", "-p", help="Download the model to a specific output path"),
+    file: str = typer.Option(None, "--file", "-f", help="Download a specific file from the model"),
+    version: int = typer.Option(None, "--version", "-v", help="Model version"),
+    assets: bool = typer.Option(False, "--assets", "-a", help="Download STAC assets from the model"),
+    force: bool = typer.Option(
+        False, "--force", "-f", help="Force download even if file exists"
+    ),
+    verbose: bool = typer.Option(False, "--verbose", help="Verbose output. This will print the progress of the download"),
+):
+    """
+    Download a model from the EOTDL.
+    \n\n
+    If using --path, it will download the model to the specified path. If no path is provided, it will download to ~/.eotdl/models.\n
+    If using --file, it will download the specified file. If no file is provided, it will download the entire model.\n
+    If using --version, it will download the specified version. If no version is provided, it will download the latest version.\n
+    If using --assets when the model is STAC, it will also download the STAC assets of the model. If not provided, it will only download the STAC metadata.\n
+    If using --force, it will download the model even if the file already exists.\n
+    If using --verbose, it will print the progress of the download.
+    \n\n
+    Examples\n
+    --------\n
+    $ eotdl models get YourModel\n
+    $ eotdl models get YourModel --path /path/to/download --file model.zip --version 1 --assets True --force True --verbose True
+    """
+    try:
+        dst_path = download_model(
+            model, version, path, typer.echo, assets, force, verbose
+        )
+        typer.echo(f"Data available at {dst_path}")
+    except Exception as e:
+        typer.echo(e)
+if __name__ == "__main__":
+    app()

eotdl/curation/__init__.py CHANGED Viewed

@@ -1,7 +1,4 @@
 from .stac.dataframe import STACDataFrame  # , read_stac
 from .stac.stac import STACGenerator
-from .folder_formatters import SHFolderFormatter
-from .stac.utils import format_time_acquired, merge_stac_catalogs
 from .stac.parsers import STACIdParser, StructuredParser, UnestructuredParser
-from .stac.dataframe_labeling import UnlabeledStrategy, LabeledStrategy
+from .stac.dataframe_labeling import UnlabeledStrategy, LabeledStrategy

eotdl/curation/stac/assets.py CHANGED Viewed

@@ -4,13 +4,14 @@ Module for STAC Asset Generators
 from os import remove, listdir
 from os.path import dirname, join, basename, abspath
-from .utils.metadata import remove_raster_metadata
+from ...tools.metadata import remove_raster_metadata
 from typing import List
 import pandas as pd
 import rasterio
 import pystac
 MEDIA_TYPES_DICT = {
     'tif': pystac.MediaType.GEOTIFF,
     'tiff': pystac.MediaType.GEOTIFF,

eotdl/curation/stac/dataframe.py CHANGED Viewed

@@ -12,7 +12,7 @@ from os import makedirs
 from typing import Union, Optional
 from math import isnan
-from .utils import convert_df_geom_to_shape, get_all_children
+from ...tools import convert_df_geom_to_shape, get_all_children
 from pathlib import Path

eotdl/curation/stac/extensions/label/image_name_labeler.py CHANGED Viewed

@@ -145,11 +145,12 @@ class ImageNameLabeler(LabelExtensionObject):
             # TODO depending on the tasks, there must be extra fields
             # https://github.com/stac-extensions/label#assets
-            tasks = item.properties['label:tasks']
-            if 'tile_regression' in tasks:
-                pass
-            elif any(task in tasks for task in ('tile_classification', 'object_detection', 'segmentation')):
-                pass
+            if 'label:tasks' in item.properties:
+                tasks = item.properties['label:tasks']
+                if 'tile_regression' in tasks:
+                    pass
+                elif any(task in tasks for task in ('tile_classification', 'object_detection', 'segmentation')):
+                    pass
             label_ext = LabelExtension.ext(item)
             label_ext.add_geojson_labels(href=geojson_path,

eotdl/curation/stac/extensions/ml_dataset.py CHANGED Viewed

@@ -15,7 +15,7 @@ from os.path import dirname, exists
 from pystac.cache import ResolvedObjectCache
 from pystac.extensions.hooks import ExtensionHooks
 from typing import Any, Dict, List, Optional, Generic, TypeVar, Union, Set
-from ..utils import make_links_relative_to_path
+from ....tools import make_links_relative_to_path
 T = TypeVar("T", pystac.Item, pystac.Collection, pystac.Catalog)
@@ -273,16 +273,16 @@ class MLDatasetQualityMetrics:
     @classmethod
     def calculate(self, catalog: Union[pystac.Catalog, str]) -> None:
         """ """
         if isinstance(catalog, str):
             catalog = MLDatasetExtension(pystac.read_file(catalog))
+        elif isinstance(catalog, pystac.Catalog):
+            catalog = MLDatasetExtension(catalog)
         # Check the catalog has the extension
         if not MLDatasetExtension.has_extension(catalog):
             raise pystac.ExtensionNotImplemented(
                 f"MLDatasetExtension does not apply to type '{type(catalog).__name__}'"
             )
-        catalog.make_all_asset_hrefs_absolute()
         try:
             catalog.add_metric(self._search_spatial_duplicates(catalog))
             catalog.add_metric(self._get_classes_balance(catalog))
@@ -300,7 +300,8 @@ class MLDatasetQualityMetrics:
             rmtree(
                 destination
             )  # Remove the old catalog and replace it with the new one
-            catalog.save(dest_href=destination)
+            catalog.set_root(catalog)
+            catalog.normalize_and_save(root_href=destination)
             print("Success!")
         except STACValidationError as error:
             # Return full callback
@@ -309,11 +310,10 @@ class MLDatasetQualityMetrics:
     @staticmethod
     def _search_spatial_duplicates(catalog: pystac.Catalog):
         """ """
-        print("Looking for spatial duplicates...")
         items = list(
             set(
                 [item
-                 for item in tqdm(catalog.get_items(recursive=True))
+                 for item in tqdm(catalog.get_items(recursive=True), desc="Looking for spatial duplicates...")
                  if not LabelExtension.has_extension(item)
                  ]
                 )
@@ -374,7 +374,6 @@ class MLDatasetQualityMetrics:
             properties = dict()
             for label in labels:
                 asset_path = label.assets["labels"].href
-                print(asset_path)
                 # Open the linked geoJSON to obtain the label properties
                 try:
                     with open(asset_path) as f:
@@ -439,12 +438,6 @@ def add_ml_extension(
 ) -> None:
     """
     Adds the ML Dataset extension to a STAC catalog.
-    Args:
-        catalog : The STAC catalog to add the extension to.
-        destination : The destination path to save the catalog to.
-        splits : The splits to make.
-        split_proportions : The proportions of the splits.
     """
     if not isinstance(catalog, pystac.Catalog) and isinstance(catalog, str):
         catalog = pystac.read_file(catalog)
@@ -456,7 +449,10 @@ def add_ml_extension(
         )
     catalog_ml_dataset = MLDatasetExtension.ext(catalog, add_if_missing=True)
-    catalog_ml_dataset.set_self_href(destination + "/catalog.json")
+    if destination:
+        catalog_ml_dataset.set_self_href(destination + "/catalog.json")
+    else:
+        destination = dirname(catalog.get_self_href())
     catalog_ml_dataset.set_root(catalog_ml_dataset)
     # Set extension properties
@@ -470,6 +466,10 @@ def add_ml_extension(
         splits_collection = catalog.get_child(
             splits_collection_id
         )  # Get the collection to split
+        if not splits_collection:
+            raise AttributeError(
+                f"The catalog does not have a collection with the id {splits_collection_id}"
+            )
         make_splits(
             splits_collection,
             train_size=train_size,
@@ -480,10 +480,7 @@ def add_ml_extension(
     # Normalize the ref on the same folder
     if destination:
-        make_links_relative_to_path(destination, catalog_ml_dataset)
-        # TODO not working
-    else:
-        destination = dirname(catalog.get_self_href())
+        catalog_ml_dataset = make_links_relative_to_path(destination, catalog_ml_dataset)
     try:
         print("Validating and saving...")
@@ -507,13 +504,6 @@ def make_splits(
 ) -> None:
     """
     Makes the splits of the labels collection.
-    Args:
-        labels_collection : The STAC Collection make the splits on.
-        train_size : The percentage of the dataset to use for training.
-        test_size : The percentage of the dataset to use for testing.
-        val_size : The percentage of the dataset to use for validation.
-        verbose : Whether to print the sizes of the splits.
     """
     if isinstance(labels_collection, str):
         labels_collection = pystac.read_file(labels_collection)

eotdl/curation/stac/extent.py CHANGED Viewed

@@ -11,7 +11,7 @@ from glob import glob
 from os.path import dirname
 from typing import List
-from .utils import get_item_metadata
+from ...tools import get_item_metadata
 def get_dem_temporal_interval() -> pystac.TemporalExtent:

eotdl/curation/stac/stac.py CHANGED Viewed

@@ -24,7 +24,7 @@ from typing import Union, Optional
 from .parsers import STACIdParser, StructuredParser
 from .assets import STACAssetGenerator
 from .dataframe_labeling import LabelingStrategy, UnlabeledStrategy
-from .utils import (format_time_acquired,
+from ...tools import (format_time_acquired,
                     cut_images,
                     get_item_metadata,
                     get_all_images_in_path)

eotdl/datasets/download.py CHANGED Viewed

@@ -4,7 +4,7 @@ from tqdm import tqdm
 from ..auth import with_auth
 from .retrieve import retrieve_dataset, retrieve_dataset_files
-from .utils import calculate_checksum
+from ..shared import calculate_checksum
 from ..repos import FilesAPIRepo
@@ -13,12 +13,12 @@ def download_dataset(
     dataset_name,
     version=None,
     path=None,
-    file=None,
     logger=None,
     assets=False,
     force=False,
     verbose=False,
     user=None,
+    file=None,
 ):
     dataset = retrieve_dataset(dataset_name)
     if version is None:
@@ -61,7 +61,7 @@ def download_dataset(
             # return Outputs(dst_path=dst_path)
         dataset_files = retrieve_dataset_files(dataset["id"], version)
         repo = FilesAPIRepo()
-        for file in tqdm(dataset_files, disable=verbose, unit="file"):
+        for file in tqdm(dataset_files, disable=verbose, unit="file", position=0):
             filename, file_version = file["filename"], file["version"]
             if verbose:
                 logger(f"Downloading {file['filename']}...")
@@ -71,12 +71,13 @@ def download_dataset(
                 user["id_token"],
                 download_path,
                 file_version,
+                progress=True,
             )
             # if calculate_checksum(dst_path) != checksum:
             #     logger(f"Checksum for {file} does not match")
             if verbose:
                 logger(f"Done")
-        return "/".join(dst_path.split("/")[:-1])
+        return download_path
     else:
         raise NotImplementedError("Downloading a STAC dataset is not implemented")
     #     logger("Downloading STAC metadata...")

eotdl 2023.10.25.post10__py3-none-any.whl → 2023.11.2.post2__py3-none-any.whl

eotdl 2023.10.25.post10py3-none-any.whl → 2023.11.2.post2py3-none-any.whl