eotdl 2023.10.25.post10__py3-none-any.whl → 2023.11.2.post2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eotdl/__init__.py +1 -1
- eotdl/cli.py +6 -2
- eotdl/commands/auth.py +18 -1
- eotdl/commands/datasets.py +61 -11
- eotdl/commands/models.py +108 -0
- eotdl/curation/__init__.py +1 -4
- eotdl/curation/stac/assets.py +2 -1
- eotdl/curation/stac/dataframe.py +1 -1
- eotdl/curation/stac/extensions/label/image_name_labeler.py +6 -5
- eotdl/curation/stac/extensions/ml_dataset.py +15 -25
- eotdl/curation/stac/extent.py +1 -1
- eotdl/curation/stac/stac.py +1 -1
- eotdl/datasets/download.py +5 -4
- eotdl/datasets/ingest.py +25 -154
- eotdl/datasets/retrieve.py +1 -1
- eotdl/files/__init__.py +1 -0
- eotdl/files/ingest.py +175 -0
- eotdl/models/__init__.py +3 -0
- eotdl/models/download.py +119 -0
- eotdl/models/ingest.py +47 -0
- eotdl/models/metadata.py +16 -0
- eotdl/models/retrieve.py +26 -0
- eotdl/repos/FilesAPIRepo.py +136 -95
- eotdl/repos/ModelsAPIRepo.py +40 -0
- eotdl/repos/__init__.py +1 -0
- eotdl/shared/__init__.py +1 -0
- eotdl/tools/__init__.py +5 -6
- eotdl/tools/geo_utils.py +15 -1
- eotdl/tools/stac.py +144 -8
- eotdl/tools/time_utils.py +19 -6
- eotdl/tools/tools.py +2 -3
- {eotdl-2023.10.25.post10.dist-info → eotdl-2023.11.2.post2.dist-info}/METADATA +1 -1
- {eotdl-2023.10.25.post10.dist-info → eotdl-2023.11.2.post2.dist-info}/RECORD +38 -35
- eotdl/curation/folder_formatters/__init__.py +0 -1
- eotdl/curation/folder_formatters/base.py +0 -19
- eotdl/curation/folder_formatters/sentinel_hub.py +0 -135
- eotdl/curation/stac/utils/__init__.py +0 -5
- eotdl/curation/stac/utils/geometry.py +0 -22
- eotdl/curation/stac/utils/stac.py +0 -143
- eotdl/curation/stac/utils/time.py +0 -21
- /eotdl/{datasets/utils.py → shared/checksum.py} +0 -0
- /eotdl/{curation/stac/utils → tools}/metadata.py +0 -0
- /eotdl/{curation/stac/utils → tools}/paths.py +0 -0
- {eotdl-2023.10.25.post10.dist-info → eotdl-2023.11.2.post2.dist-info}/WHEEL +0 -0
- {eotdl-2023.10.25.post10.dist-info → eotdl-2023.11.2.post2.dist-info}/entry_points.txt +0 -0
eotdl/__init__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "2023.
|
1
|
+
__version__ = "2023.11.02-2"
|
eotdl/cli.py
CHANGED
@@ -1,15 +1,19 @@
|
|
1
1
|
import typer
|
2
|
-
from .commands import auth, datasets
|
2
|
+
from .commands import auth, datasets, models
|
3
3
|
from . import __version__
|
4
4
|
|
5
|
-
app = typer.Typer()
|
5
|
+
app = typer.Typer(help='EOTDL command line interface.')
|
6
6
|
|
7
7
|
app.add_typer(auth.app, name="auth")
|
8
8
|
app.add_typer(datasets.app, name="datasets")
|
9
|
+
app.add_typer(models.app, name="models")
|
9
10
|
|
10
11
|
|
11
12
|
@app.command()
|
12
13
|
def version():
|
14
|
+
"""
|
15
|
+
Get EOTDL version.
|
16
|
+
"""
|
13
17
|
typer.echo(f"EOTDL Version: {__version__}")
|
14
18
|
|
15
19
|
|
eotdl/commands/auth.py
CHANGED
@@ -2,11 +2,23 @@ import typer
|
|
2
2
|
from ..auth import is_logged, auth, logout_user
|
3
3
|
from ..auth.errors import LoginError
|
4
4
|
|
5
|
-
app = typer.Typer()
|
5
|
+
app = typer.Typer(help="EOTDL CLI authentication module.")
|
6
6
|
|
7
7
|
|
8
8
|
@app.command()
|
9
9
|
def login():
|
10
|
+
"""
|
11
|
+
Login to the EOTDL.
|
12
|
+
|
13
|
+
This command will return a URL that you can visit to authenticate.
|
14
|
+
After authentication, your credentials will be stored locally.
|
15
|
+
This enables future commands to be executed without having to authenticate again
|
16
|
+
(at least while the credentials are valid).
|
17
|
+
\n\n
|
18
|
+
The default browser will be opened automatically. If not, you can copy the URL and paste it in your browser.
|
19
|
+
\n\n
|
20
|
+
By default, the credentials will be stored in the following file: ~/.eotdl/credentials.json
|
21
|
+
"""
|
10
22
|
try:
|
11
23
|
user = auth()
|
12
24
|
typer.echo(f"You are logged in as {user['email']}")
|
@@ -17,6 +29,11 @@ def login():
|
|
17
29
|
|
18
30
|
@app.command()
|
19
31
|
def logout():
|
32
|
+
"""
|
33
|
+
Logout from the EOTDL.
|
34
|
+
|
35
|
+
You will receive a logout url that you can visit in case you want to authenticate with a different account.
|
36
|
+
"""
|
20
37
|
user = is_logged()
|
21
38
|
if user:
|
22
39
|
typer.echo(f"You are logged in as {user['email']}")
|
eotdl/commands/datasets.py
CHANGED
@@ -7,14 +7,25 @@ from ..datasets import (
|
|
7
7
|
download_dataset,
|
8
8
|
)
|
9
9
|
|
10
|
-
app = typer.Typer()
|
10
|
+
app = typer.Typer(help="EOTDL CLI datasets module.")
|
11
11
|
|
12
12
|
|
13
13
|
@app.command()
|
14
14
|
def list(
|
15
|
-
name: str = typer.Option(None, "--name", "-n", help="Filter by name"),
|
16
|
-
limit: int = typer.Option(None, "--limit", "-l", help="Limit number of results"),
|
15
|
+
name: str = typer.Option(None, "--name", "-n", help="Filter the returned datasets by name"),
|
16
|
+
limit: int = typer.Option(None, "--limit", "-l", help="Limit the number of returned results"),
|
17
17
|
):
|
18
|
+
"""
|
19
|
+
Retrieve a list with all the datasets in the EOTDL.
|
20
|
+
|
21
|
+
If using --name, it will filter the results by name. If no name is provided, it will return all the datasets.\n
|
22
|
+
If using --limit, it will limit the number of results. If no limit is provided, it will return all the datasets.
|
23
|
+
\n\n
|
24
|
+
Examples\n
|
25
|
+
--------\n
|
26
|
+
$ eotdl datasets list\n
|
27
|
+
$ eotdl datasets list --name YourModel --limit 5
|
28
|
+
"""
|
18
29
|
try:
|
19
30
|
datasets = retrieve_datasets(name, limit)
|
20
31
|
typer.echo(datasets)
|
@@ -24,9 +35,33 @@ def list(
|
|
24
35
|
|
25
36
|
@app.command()
|
26
37
|
def ingest(
|
27
|
-
path: Path = typer.Option(..., "--path", "-p", help="Path to dataset"),
|
28
|
-
verbose: bool = typer.Option(False, "--verbose", help="Verbose output"),
|
38
|
+
path: Path = typer.Option(..., "--path", "-p", help="Path to the dataset to ingest"),
|
39
|
+
verbose: bool = typer.Option(False, "--verbose", help="Verbose output. This will print the progress of the ingestion"),
|
29
40
|
):
|
41
|
+
"""
|
42
|
+
Ingest a dataset to the EOTDL.
|
43
|
+
|
44
|
+
This command ingests the dataset to the EOTDL. The dataset must be a folder with the dataset files,
|
45
|
+
and at least a metadata.yml file or a catalog.json file. If there are not these files, the ingestion
|
46
|
+
will not work. All the files in the folder will be uploaded to the EOTDL.
|
47
|
+
\n\n
|
48
|
+
The following constraints apply to the dataset name:\n
|
49
|
+
- It must be unique\n
|
50
|
+
- It must be between 3 and 45 characters long\n
|
51
|
+
- It can only contain alphanumeric characters and dashes.\n
|
52
|
+
\n
|
53
|
+
The metadata.yml file must contain the following fields:\n
|
54
|
+
- name: the name of the dataset\n
|
55
|
+
- authors: the author or authors of the dataset\n
|
56
|
+
- license: the license of the dataset\n
|
57
|
+
- source: the source of the dataset\n
|
58
|
+
\n
|
59
|
+
If using --verbose, it will print the progress of the ingestion.
|
60
|
+
\n\n
|
61
|
+
Examples\n
|
62
|
+
--------\n
|
63
|
+
$ eotdl dataset ingest --path /path/to/folder-with-dataset --verbose True
|
64
|
+
"""
|
30
65
|
try:
|
31
66
|
ingest_dataset(path, verbose, typer.echo)
|
32
67
|
except Exception as e:
|
@@ -35,19 +70,34 @@ def ingest(
|
|
35
70
|
|
36
71
|
@app.command()
|
37
72
|
def get(
|
38
|
-
dataset: str,
|
39
|
-
path: str = typer.Option(None, "--path", "-p", help="Download to a specific path"),
|
40
|
-
file: str = typer.Option(None, "--file", "-f", help="Download a specific file"),
|
73
|
+
dataset: str = typer.Argument(None, help="Name of the dataset to download"),
|
74
|
+
path: str = typer.Option(None, "--path", "-p", help="Download the dataset to a specific output path"),
|
75
|
+
file: str = typer.Option(None, "--file", "-f", help="Download a specific file from the dataset"),
|
41
76
|
version: int = typer.Option(None, "--version", "-v", help="Dataset version"),
|
42
|
-
assets: bool = typer.Option(False, "--assets", "-a", help="Download assets"),
|
77
|
+
assets: bool = typer.Option(False, "--assets", "-a", help="Download STAC assets from the dataset"),
|
43
78
|
force: bool = typer.Option(
|
44
79
|
False, "--force", "-f", help="Force download even if file exists"
|
45
80
|
),
|
46
|
-
verbose: bool = typer.Option(False, "--verbose", help="Verbose output"),
|
81
|
+
verbose: bool = typer.Option(False, "--verbose", help="Verbose output. This will print the progress of the download"),
|
47
82
|
):
|
83
|
+
"""
|
84
|
+
Download a dataset from the EOTDL.
|
85
|
+
\n\n
|
86
|
+
If using --path, it will download the dataset to the specified path. If no path is provided, it will download to ~/.eotdl/datasets.\n
|
87
|
+
If using --file, it will download the specified file. If no file is provided, it will download the entire dataset.\n
|
88
|
+
If using --version, it will download the specified version. If no version is provided, it will download the latest version.\n
|
89
|
+
If using --assets when the dataset is STAC, it will also download the STAC assets of the dataset. If not provided, it will only download the STAC metadata.\n
|
90
|
+
If using --force, it will download the dataset even if the file already exists.\n
|
91
|
+
If using --verbose, it will print the progress of the download.
|
92
|
+
\n\n
|
93
|
+
Examples\n
|
94
|
+
--------\n
|
95
|
+
$ eotdl dataset get YourDataset\n
|
96
|
+
$ eotdl dataset get YourDataset --path /path/to/download --file dataset.zip --version 1 --assets True --force True --verbose True
|
97
|
+
"""
|
48
98
|
try:
|
49
99
|
dst_path = download_dataset(
|
50
|
-
dataset, version, path,
|
100
|
+
dataset, version, path, typer.echo, assets, force, verbose
|
51
101
|
)
|
52
102
|
typer.echo(f"Data available at {dst_path}")
|
53
103
|
except Exception as e:
|
eotdl/commands/models.py
ADDED
@@ -0,0 +1,108 @@
|
|
1
|
+
import typer
|
2
|
+
from pathlib import Path
|
3
|
+
|
4
|
+
from ..models import (
|
5
|
+
retrieve_models,
|
6
|
+
ingest_model,
|
7
|
+
download_model,
|
8
|
+
)
|
9
|
+
|
10
|
+
app = typer.Typer(help="EOTDL CLI models module.")
|
11
|
+
|
12
|
+
|
13
|
+
@app.command()
|
14
|
+
def list(
|
15
|
+
name: str = typer.Option(None, "--name", "-n", help="Filter the returned models by name"),
|
16
|
+
limit: int = typer.Option(None, "--limit", "-l", help="Limit the number of returned results"),
|
17
|
+
):
|
18
|
+
"""
|
19
|
+
Retrieve a list with all the models in the EOTDL.
|
20
|
+
|
21
|
+
If using --name, it will filter the results by name. If no name is provided, it will return all the models.\n
|
22
|
+
If using --limit, it will limit the number of results. If no limit is provided, it will return all the models.
|
23
|
+
\n\n
|
24
|
+
Examples\n
|
25
|
+
--------\n
|
26
|
+
$ eotdl models list\n
|
27
|
+
$ eotdl models list --name YourModel --limit 5
|
28
|
+
"""
|
29
|
+
try:
|
30
|
+
models = retrieve_models(name, limit)
|
31
|
+
typer.echo(models)
|
32
|
+
except Exception as e:
|
33
|
+
typer.echo(e)
|
34
|
+
|
35
|
+
|
36
|
+
@app.command()
|
37
|
+
def ingest(
|
38
|
+
path: Path = typer.Option(..., "--path", "-p", help="Path to the model to ingest"),
|
39
|
+
verbose: bool = typer.Option(False, "--verbose", help="Verbose output. This will print the progress of the ingestion"),
|
40
|
+
):
|
41
|
+
"""
|
42
|
+
Ingest a model to the EOTDL.
|
43
|
+
|
44
|
+
This command ingests the model to the EOTDL. The model must be a folder with the model files,
|
45
|
+
and at least a metadata.yml file or a catalog.json file. If there are not these files, the ingestion
|
46
|
+
will not work. All the files in the folder will be uploaded to the EOTDL.
|
47
|
+
\n\n
|
48
|
+
The following constraints apply to the model name:\n
|
49
|
+
- It must be unique\n
|
50
|
+
- It must be between 3 and 45 characters long\n
|
51
|
+
- It can only contain alphanumeric characters and dashes.\n
|
52
|
+
\n
|
53
|
+
The metadata.yml file must contain the following fields:\n
|
54
|
+
- name: the name of the model\n
|
55
|
+
- authors: the author or authors of the model\n
|
56
|
+
- license: the license of the model\n
|
57
|
+
- source: the source of the model\n
|
58
|
+
\n
|
59
|
+
If using --verbose, it will print the progress of the ingestion.
|
60
|
+
\n\n
|
61
|
+
Examples\n
|
62
|
+
--------\n
|
63
|
+
$ eotdl models ingest --path /path/to/folder-with-model --verbose True
|
64
|
+
"""
|
65
|
+
try:
|
66
|
+
ingest_model(path, verbose, typer.echo)
|
67
|
+
except Exception as e:
|
68
|
+
typer.echo(e)
|
69
|
+
|
70
|
+
|
71
|
+
@app.command()
|
72
|
+
def get(
|
73
|
+
model: str = typer.Argument(None, help="Name of the model to download"),
|
74
|
+
path: str = typer.Option(None, "--path", "-p", help="Download the model to a specific output path"),
|
75
|
+
file: str = typer.Option(None, "--file", "-f", help="Download a specific file from the model"),
|
76
|
+
version: int = typer.Option(None, "--version", "-v", help="Model version"),
|
77
|
+
assets: bool = typer.Option(False, "--assets", "-a", help="Download STAC assets from the model"),
|
78
|
+
force: bool = typer.Option(
|
79
|
+
False, "--force", "-f", help="Force download even if file exists"
|
80
|
+
),
|
81
|
+
verbose: bool = typer.Option(False, "--verbose", help="Verbose output. This will print the progress of the download"),
|
82
|
+
):
|
83
|
+
"""
|
84
|
+
Download a model from the EOTDL.
|
85
|
+
\n\n
|
86
|
+
If using --path, it will download the model to the specified path. If no path is provided, it will download to ~/.eotdl/models.\n
|
87
|
+
If using --file, it will download the specified file. If no file is provided, it will download the entire model.\n
|
88
|
+
If using --version, it will download the specified version. If no version is provided, it will download the latest version.\n
|
89
|
+
If using --assets when the model is STAC, it will also download the STAC assets of the model. If not provided, it will only download the STAC metadata.\n
|
90
|
+
If using --force, it will download the model even if the file already exists.\n
|
91
|
+
If using --verbose, it will print the progress of the download.
|
92
|
+
\n\n
|
93
|
+
Examples\n
|
94
|
+
--------\n
|
95
|
+
$ eotdl models get YourModel\n
|
96
|
+
$ eotdl models get YourModel --path /path/to/download --file model.zip --version 1 --assets True --force True --verbose True
|
97
|
+
"""
|
98
|
+
try:
|
99
|
+
dst_path = download_model(
|
100
|
+
model, version, path, typer.echo, assets, force, verbose
|
101
|
+
)
|
102
|
+
typer.echo(f"Data available at {dst_path}")
|
103
|
+
except Exception as e:
|
104
|
+
typer.echo(e)
|
105
|
+
|
106
|
+
|
107
|
+
if __name__ == "__main__":
|
108
|
+
app()
|
eotdl/curation/__init__.py
CHANGED
@@ -1,7 +1,4 @@
|
|
1
1
|
from .stac.dataframe import STACDataFrame # , read_stac
|
2
|
-
|
3
2
|
from .stac.stac import STACGenerator
|
4
|
-
from .folder_formatters import SHFolderFormatter
|
5
|
-
from .stac.utils import format_time_acquired, merge_stac_catalogs
|
6
3
|
from .stac.parsers import STACIdParser, StructuredParser, UnestructuredParser
|
7
|
-
from .stac.dataframe_labeling import UnlabeledStrategy, LabeledStrategy
|
4
|
+
from .stac.dataframe_labeling import UnlabeledStrategy, LabeledStrategy
|
eotdl/curation/stac/assets.py
CHANGED
@@ -4,13 +4,14 @@ Module for STAC Asset Generators
|
|
4
4
|
|
5
5
|
from os import remove, listdir
|
6
6
|
from os.path import dirname, join, basename, abspath
|
7
|
-
from .
|
7
|
+
from ...tools.metadata import remove_raster_metadata
|
8
8
|
from typing import List
|
9
9
|
|
10
10
|
import pandas as pd
|
11
11
|
import rasterio
|
12
12
|
import pystac
|
13
13
|
|
14
|
+
|
14
15
|
MEDIA_TYPES_DICT = {
|
15
16
|
'tif': pystac.MediaType.GEOTIFF,
|
16
17
|
'tiff': pystac.MediaType.GEOTIFF,
|
eotdl/curation/stac/dataframe.py
CHANGED
@@ -145,11 +145,12 @@ class ImageNameLabeler(LabelExtensionObject):
|
|
145
145
|
|
146
146
|
# TODO depending on the tasks, there must be extra fields
|
147
147
|
# https://github.com/stac-extensions/label#assets
|
148
|
-
tasks
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
148
|
+
if 'label:tasks' in item.properties:
|
149
|
+
tasks = item.properties['label:tasks']
|
150
|
+
if 'tile_regression' in tasks:
|
151
|
+
pass
|
152
|
+
elif any(task in tasks for task in ('tile_classification', 'object_detection', 'segmentation')):
|
153
|
+
pass
|
153
154
|
|
154
155
|
label_ext = LabelExtension.ext(item)
|
155
156
|
label_ext.add_geojson_labels(href=geojson_path,
|
@@ -15,7 +15,7 @@ from os.path import dirname, exists
|
|
15
15
|
from pystac.cache import ResolvedObjectCache
|
16
16
|
from pystac.extensions.hooks import ExtensionHooks
|
17
17
|
from typing import Any, Dict, List, Optional, Generic, TypeVar, Union, Set
|
18
|
-
from
|
18
|
+
from ....tools import make_links_relative_to_path
|
19
19
|
|
20
20
|
T = TypeVar("T", pystac.Item, pystac.Collection, pystac.Catalog)
|
21
21
|
|
@@ -273,16 +273,16 @@ class MLDatasetQualityMetrics:
|
|
273
273
|
@classmethod
|
274
274
|
def calculate(self, catalog: Union[pystac.Catalog, str]) -> None:
|
275
275
|
""" """
|
276
|
-
|
277
276
|
if isinstance(catalog, str):
|
278
277
|
catalog = MLDatasetExtension(pystac.read_file(catalog))
|
278
|
+
elif isinstance(catalog, pystac.Catalog):
|
279
|
+
catalog = MLDatasetExtension(catalog)
|
279
280
|
# Check the catalog has the extension
|
280
281
|
if not MLDatasetExtension.has_extension(catalog):
|
281
282
|
raise pystac.ExtensionNotImplemented(
|
282
283
|
f"MLDatasetExtension does not apply to type '{type(catalog).__name__}'"
|
283
284
|
)
|
284
285
|
|
285
|
-
catalog.make_all_asset_hrefs_absolute()
|
286
286
|
try:
|
287
287
|
catalog.add_metric(self._search_spatial_duplicates(catalog))
|
288
288
|
catalog.add_metric(self._get_classes_balance(catalog))
|
@@ -300,7 +300,8 @@ class MLDatasetQualityMetrics:
|
|
300
300
|
rmtree(
|
301
301
|
destination
|
302
302
|
) # Remove the old catalog and replace it with the new one
|
303
|
-
catalog.
|
303
|
+
catalog.set_root(catalog)
|
304
|
+
catalog.normalize_and_save(root_href=destination)
|
304
305
|
print("Success!")
|
305
306
|
except STACValidationError as error:
|
306
307
|
# Return full callback
|
@@ -309,11 +310,10 @@ class MLDatasetQualityMetrics:
|
|
309
310
|
@staticmethod
|
310
311
|
def _search_spatial_duplicates(catalog: pystac.Catalog):
|
311
312
|
""" """
|
312
|
-
print("Looking for spatial duplicates...")
|
313
313
|
items = list(
|
314
314
|
set(
|
315
315
|
[item
|
316
|
-
for item in tqdm(catalog.get_items(recursive=True))
|
316
|
+
for item in tqdm(catalog.get_items(recursive=True), desc="Looking for spatial duplicates...")
|
317
317
|
if not LabelExtension.has_extension(item)
|
318
318
|
]
|
319
319
|
)
|
@@ -374,7 +374,6 @@ class MLDatasetQualityMetrics:
|
|
374
374
|
properties = dict()
|
375
375
|
for label in labels:
|
376
376
|
asset_path = label.assets["labels"].href
|
377
|
-
print(asset_path)
|
378
377
|
# Open the linked geoJSON to obtain the label properties
|
379
378
|
try:
|
380
379
|
with open(asset_path) as f:
|
@@ -439,12 +438,6 @@ def add_ml_extension(
|
|
439
438
|
) -> None:
|
440
439
|
"""
|
441
440
|
Adds the ML Dataset extension to a STAC catalog.
|
442
|
-
|
443
|
-
Args:
|
444
|
-
catalog : The STAC catalog to add the extension to.
|
445
|
-
destination : The destination path to save the catalog to.
|
446
|
-
splits : The splits to make.
|
447
|
-
split_proportions : The proportions of the splits.
|
448
441
|
"""
|
449
442
|
if not isinstance(catalog, pystac.Catalog) and isinstance(catalog, str):
|
450
443
|
catalog = pystac.read_file(catalog)
|
@@ -456,7 +449,10 @@ def add_ml_extension(
|
|
456
449
|
)
|
457
450
|
|
458
451
|
catalog_ml_dataset = MLDatasetExtension.ext(catalog, add_if_missing=True)
|
459
|
-
|
452
|
+
if destination:
|
453
|
+
catalog_ml_dataset.set_self_href(destination + "/catalog.json")
|
454
|
+
else:
|
455
|
+
destination = dirname(catalog.get_self_href())
|
460
456
|
catalog_ml_dataset.set_root(catalog_ml_dataset)
|
461
457
|
|
462
458
|
# Set extension properties
|
@@ -470,6 +466,10 @@ def add_ml_extension(
|
|
470
466
|
splits_collection = catalog.get_child(
|
471
467
|
splits_collection_id
|
472
468
|
) # Get the collection to split
|
469
|
+
if not splits_collection:
|
470
|
+
raise AttributeError(
|
471
|
+
f"The catalog does not have a collection with the id {splits_collection_id}"
|
472
|
+
)
|
473
473
|
make_splits(
|
474
474
|
splits_collection,
|
475
475
|
train_size=train_size,
|
@@ -480,10 +480,7 @@ def add_ml_extension(
|
|
480
480
|
|
481
481
|
# Normalize the ref on the same folder
|
482
482
|
if destination:
|
483
|
-
make_links_relative_to_path(destination, catalog_ml_dataset)
|
484
|
-
# TODO not working
|
485
|
-
else:
|
486
|
-
destination = dirname(catalog.get_self_href())
|
483
|
+
catalog_ml_dataset = make_links_relative_to_path(destination, catalog_ml_dataset)
|
487
484
|
|
488
485
|
try:
|
489
486
|
print("Validating and saving...")
|
@@ -507,13 +504,6 @@ def make_splits(
|
|
507
504
|
) -> None:
|
508
505
|
"""
|
509
506
|
Makes the splits of the labels collection.
|
510
|
-
|
511
|
-
Args:
|
512
|
-
labels_collection : The STAC Collection make the splits on.
|
513
|
-
train_size : The percentage of the dataset to use for training.
|
514
|
-
test_size : The percentage of the dataset to use for testing.
|
515
|
-
val_size : The percentage of the dataset to use for validation.
|
516
|
-
verbose : Whether to print the sizes of the splits.
|
517
507
|
"""
|
518
508
|
if isinstance(labels_collection, str):
|
519
509
|
labels_collection = pystac.read_file(labels_collection)
|
eotdl/curation/stac/extent.py
CHANGED
eotdl/curation/stac/stac.py
CHANGED
@@ -24,7 +24,7 @@ from typing import Union, Optional
|
|
24
24
|
from .parsers import STACIdParser, StructuredParser
|
25
25
|
from .assets import STACAssetGenerator
|
26
26
|
from .dataframe_labeling import LabelingStrategy, UnlabeledStrategy
|
27
|
-
from
|
27
|
+
from ...tools import (format_time_acquired,
|
28
28
|
cut_images,
|
29
29
|
get_item_metadata,
|
30
30
|
get_all_images_in_path)
|
eotdl/datasets/download.py
CHANGED
@@ -4,7 +4,7 @@ from tqdm import tqdm
|
|
4
4
|
|
5
5
|
from ..auth import with_auth
|
6
6
|
from .retrieve import retrieve_dataset, retrieve_dataset_files
|
7
|
-
from
|
7
|
+
from ..shared import calculate_checksum
|
8
8
|
from ..repos import FilesAPIRepo
|
9
9
|
|
10
10
|
|
@@ -13,12 +13,12 @@ def download_dataset(
|
|
13
13
|
dataset_name,
|
14
14
|
version=None,
|
15
15
|
path=None,
|
16
|
-
file=None,
|
17
16
|
logger=None,
|
18
17
|
assets=False,
|
19
18
|
force=False,
|
20
19
|
verbose=False,
|
21
20
|
user=None,
|
21
|
+
file=None,
|
22
22
|
):
|
23
23
|
dataset = retrieve_dataset(dataset_name)
|
24
24
|
if version is None:
|
@@ -61,7 +61,7 @@ def download_dataset(
|
|
61
61
|
# return Outputs(dst_path=dst_path)
|
62
62
|
dataset_files = retrieve_dataset_files(dataset["id"], version)
|
63
63
|
repo = FilesAPIRepo()
|
64
|
-
for file in tqdm(dataset_files, disable=verbose, unit="file"):
|
64
|
+
for file in tqdm(dataset_files, disable=verbose, unit="file", position=0):
|
65
65
|
filename, file_version = file["filename"], file["version"]
|
66
66
|
if verbose:
|
67
67
|
logger(f"Downloading {file['filename']}...")
|
@@ -71,12 +71,13 @@ def download_dataset(
|
|
71
71
|
user["id_token"],
|
72
72
|
download_path,
|
73
73
|
file_version,
|
74
|
+
progress=True,
|
74
75
|
)
|
75
76
|
# if calculate_checksum(dst_path) != checksum:
|
76
77
|
# logger(f"Checksum for {file} does not match")
|
77
78
|
if verbose:
|
78
79
|
logger(f"Done")
|
79
|
-
return
|
80
|
+
return download_path
|
80
81
|
else:
|
81
82
|
raise NotImplementedError("Downloading a STAC dataset is not implemented")
|
82
83
|
# logger("Downloading STAC metadata...")
|