eotdl 2025.4.22.post2__py3-none-any.whl → 2025.5.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eotdl/__init__.py +1 -1
- eotdl/cli.py +2 -1
- eotdl/commands/auth.py +5 -2
- eotdl/commands/datasets.py +16 -10
- eotdl/commands/models.py +8 -9
- eotdl/commands/pipelines.py +132 -0
- eotdl/datasets/__init__.py +1 -1
- eotdl/datasets/ingest.py +18 -7
- eotdl/datasets/retrieve.py +16 -2
- eotdl/datasets/stage.py +1 -1
- eotdl/datasets/update.py +6 -4
- eotdl/fe/__init__.py +4 -0
- eotdl/fe/ingest.py +49 -0
- eotdl/fe/openeo/__init__.py +2 -0
- eotdl/fe/openeo/advanced_patch_extraction.py +124 -0
- eotdl/fe/openeo/basic_point_extraction.py +86 -0
- eotdl/fe/openeo/dataframe_utils.py +230 -0
- eotdl/fe/openeo/s3proxy_utils.py +180 -0
- eotdl/fe/openeo/spatial_utils.py +28 -0
- eotdl/fe/openeo/temporal_utils.py +16 -0
- eotdl/fe/retrieve.py +18 -0
- eotdl/fe/stage.py +63 -0
- eotdl/fe/update.py +12 -0
- eotdl/files/__init__.py +1 -0
- eotdl/files/get_url.py +18 -0
- eotdl/files/ingest.py +4 -4
- eotdl/files/metadata.py +2 -1
- eotdl/models/ingest.py +3 -2
- eotdl/models/update.py +6 -4
- eotdl/repos/DatasetsAPIRepo.py +15 -3
- eotdl/repos/FEAPIRepo.py +50 -0
- eotdl/repos/FilesAPIRepo.py +0 -1
- eotdl/repos/ModelsAPIRepo.py +3 -2
- eotdl/repos/__init__.py +2 -1
- eotdl/tools/ais_labelling.py +273 -0
- {eotdl-2025.4.22.post2.dist-info → eotdl-2025.5.26.dist-info}/METADATA +1 -1
- {eotdl-2025.4.22.post2.dist-info → eotdl-2025.5.26.dist-info}/RECORD +39 -23
- {eotdl-2025.4.22.post2.dist-info → eotdl-2025.5.26.dist-info}/WHEEL +0 -0
- {eotdl-2025.4.22.post2.dist-info → eotdl-2025.5.26.dist-info}/entry_points.txt +0 -0
eotdl/__init__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "2025.
|
1
|
+
__version__ = "2025.05.26"
|
eotdl/cli.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
import typer
|
2
2
|
import os
|
3
3
|
|
4
|
-
from .commands import auth, datasets, models, stac
|
4
|
+
from .commands import auth, datasets, models, stac, pipelines
|
5
5
|
from .repos import APIRepo
|
6
6
|
from . import __version__
|
7
7
|
|
@@ -11,6 +11,7 @@ app.add_typer(auth.app, name="auth")
|
|
11
11
|
app.add_typer(datasets.app, name="datasets")
|
12
12
|
app.add_typer(models.app, name="models")
|
13
13
|
app.add_typer(stac.app, name="stac")
|
14
|
+
app.add_typer(pipelines.app, name="pipelines")
|
14
15
|
|
15
16
|
@app.command()
|
16
17
|
def version():
|
eotdl/commands/auth.py
CHANGED
@@ -28,7 +28,9 @@ def login():
|
|
28
28
|
|
29
29
|
|
30
30
|
@app.command()
|
31
|
-
def logout(
|
31
|
+
def logout(
|
32
|
+
yes: bool = typer.Option(False, "--yes", "-y", help="Skip confirmation")
|
33
|
+
):
|
32
34
|
"""
|
33
35
|
Logout from the EOTDL.
|
34
36
|
|
@@ -37,7 +39,8 @@ def logout():
|
|
37
39
|
user = is_logged()
|
38
40
|
if user:
|
39
41
|
typer.echo(f"You are logged in as {user['email']}")
|
40
|
-
|
42
|
+
if not yes:
|
43
|
+
typer.confirm("Are you sure you want to logout?", abort=True)
|
41
44
|
logout_url = logout_user()
|
42
45
|
typer.echo("You are logged out.")
|
43
46
|
typer.echo(
|
eotdl/commands/datasets.py
CHANGED
@@ -6,6 +6,7 @@ from ..datasets import (
|
|
6
6
|
ingest_dataset,
|
7
7
|
stage_dataset,
|
8
8
|
deactivate_dataset,
|
9
|
+
retrieve_private_datasets
|
9
10
|
)
|
10
11
|
|
11
12
|
app = typer.Typer(help="Explore, ingest and download training datasets.")
|
@@ -72,6 +73,9 @@ def list(
|
|
72
73
|
limit: int = typer.Option(
|
73
74
|
None, "--limit", "-l", help="Limit the number of returned results"
|
74
75
|
),
|
76
|
+
private: bool = typer.Option(
|
77
|
+
False, "--private", "-p", help="Show private datasets"
|
78
|
+
),
|
75
79
|
):
|
76
80
|
"""
|
77
81
|
Retrieve a list with all the datasets in the EOTDL.
|
@@ -85,7 +89,10 @@ def list(
|
|
85
89
|
$ eotdl datasets list --name YourModel --limit 5
|
86
90
|
"""
|
87
91
|
try:
|
88
|
-
|
92
|
+
if private:
|
93
|
+
datasets = retrieve_private_datasets()
|
94
|
+
else:
|
95
|
+
datasets = retrieve_datasets(name, limit)
|
89
96
|
typer.echo(datasets)
|
90
97
|
except Exception as e:
|
91
98
|
typer.echo(e)
|
@@ -96,9 +103,6 @@ def get(
|
|
96
103
|
path: str = typer.Option(
|
97
104
|
None, "--path", "-p", help="Download the dataset to a specific output path"
|
98
105
|
),
|
99
|
-
file: str = typer.Option(
|
100
|
-
None, "--file", "-f", help="Download a specific file from the dataset"
|
101
|
-
),
|
102
106
|
version: int = typer.Option(None, "--version", "-v", help="Dataset version"),
|
103
107
|
assets: bool = typer.Option(
|
104
108
|
False, "--assets", "-a", help="Download STAC assets from the dataset"
|
@@ -116,7 +120,6 @@ def get(
|
|
116
120
|
Download a dataset from the EOTDL.
|
117
121
|
\n\n
|
118
122
|
If using --path, it will download the dataset to the specified path. If no path is provided, it will download to ~/.eotdl/datasets.\n
|
119
|
-
If using --file, it will download the specified file. If no file is provided, it will download the entire dataset.\n
|
120
123
|
If using --version, it will download the specified version. If no version is provided, it will download the latest version.\n
|
121
124
|
If using --assets when the dataset is STAC, it will also download the STAC assets of the dataset. If not provided, it will only download the STAC metadata.\n
|
122
125
|
If using --force, it will download the dataset even if the file already exists.\n
|
@@ -125,7 +128,7 @@ def get(
|
|
125
128
|
Examples\n
|
126
129
|
--------\n
|
127
130
|
$ eotdl dataset get YourDataset\n
|
128
|
-
$ eotdl dataset get YourDataset --path /path/to/download --
|
131
|
+
$ eotdl dataset get YourDataset --path /path/to/download --version 1 --assets True --force True --verbose True
|
129
132
|
"""
|
130
133
|
try:
|
131
134
|
dst_path = stage_dataset(
|
@@ -143,12 +146,15 @@ def get(
|
|
143
146
|
|
144
147
|
|
145
148
|
@app.command()
|
146
|
-
def
|
147
|
-
|
149
|
+
def delete(
|
150
|
+
dataset: str = typer.Argument(None, help="Name of the dataset to deactivate")
|
148
151
|
):
|
152
|
+
"""
|
153
|
+
Delete a dataset from the EOTDL.
|
154
|
+
"""
|
149
155
|
try:
|
150
|
-
deactivate_dataset(
|
151
|
-
typer.echo(f"Dataset {
|
156
|
+
deactivate_dataset(dataset)
|
157
|
+
typer.echo(f"Dataset {dataset} deleted")
|
152
158
|
except Exception as e:
|
153
159
|
typer.echo(e)
|
154
160
|
|
eotdl/commands/models.py
CHANGED
@@ -93,9 +93,6 @@ def get(
|
|
93
93
|
path: str = typer.Option(
|
94
94
|
None, "--path", "-p", help="Download the model to a specific output path"
|
95
95
|
),
|
96
|
-
file: str = typer.Option(
|
97
|
-
None, "--file", "-f", help="Download a specific file from the model"
|
98
|
-
),
|
99
96
|
version: int = typer.Option(None, "--version", "-v", help="Model version"),
|
100
97
|
assets: bool = typer.Option(
|
101
98
|
False, "--assets", "-a", help="Download STAC assets from the model"
|
@@ -113,7 +110,6 @@ def get(
|
|
113
110
|
Download a model from the EOTDL.
|
114
111
|
\n\n
|
115
112
|
If using --path, it will download the model to the specified path. If no path is provided, it will download to ~/.eotdl/models.\n
|
116
|
-
If using --file, it will download the specified file. If no file is provided, it will download the entire model.\n
|
117
113
|
If using --version, it will download the specified version. If no version is provided, it will download the latest version.\n
|
118
114
|
If using --assets when the model is STAC, it will also download the STAC assets of the model. If not provided, it will only download the STAC metadata.\n
|
119
115
|
If using --force, it will download the model even if the file already exists.\n
|
@@ -122,7 +118,7 @@ def get(
|
|
122
118
|
Examples\n
|
123
119
|
--------\n
|
124
120
|
$ eotdl models get YourModel\n
|
125
|
-
$ eotdl models get YourModel --path /path/to/download --
|
121
|
+
$ eotdl models get YourModel --path /path/to/download --version 1 --assets True --force True --verbose True
|
126
122
|
"""
|
127
123
|
try:
|
128
124
|
dst_path = stage_model(
|
@@ -134,12 +130,15 @@ def get(
|
|
134
130
|
|
135
131
|
|
136
132
|
@app.command()
|
137
|
-
def
|
138
|
-
|
133
|
+
def delete(
|
134
|
+
model_name: str = typer.Argument(None, help="Name of the model to delete")
|
139
135
|
):
|
136
|
+
"""
|
137
|
+
Delete a model from the EOTDL.
|
138
|
+
"""
|
140
139
|
try:
|
141
|
-
deactivate_model(
|
142
|
-
typer.echo(f"Model {
|
140
|
+
deactivate_model(model_name)
|
141
|
+
typer.echo(f"Model {model_name} deleted")
|
143
142
|
except Exception as e:
|
144
143
|
typer.echo(e)
|
145
144
|
|
@@ -0,0 +1,132 @@
|
|
1
|
+
import typer
|
2
|
+
from pathlib import Path
|
3
|
+
|
4
|
+
from ..fe import (
|
5
|
+
ingest_openeo,
|
6
|
+
retrieve_pipelines,
|
7
|
+
stage_pipeline,
|
8
|
+
deactivate_pipeline,
|
9
|
+
)
|
10
|
+
|
11
|
+
app = typer.Typer(help="Explore, ingest and download Feature Engineering Pipelines.")
|
12
|
+
|
13
|
+
@app.command()
|
14
|
+
def ingest(
|
15
|
+
path: Path = typer.Option(..., "--path", "-p", help="Path to the pipeline to ingest"),
|
16
|
+
verbose: bool = typer.Option(
|
17
|
+
False,
|
18
|
+
"--verbose",
|
19
|
+
help="Verbose output. This will print the progress of the ingestion",
|
20
|
+
),
|
21
|
+
foce_metadata_update: bool = typer.Option(
|
22
|
+
False,
|
23
|
+
"--force",
|
24
|
+
"-f",
|
25
|
+
help="Force metadata update even if it already exists. Will overwrite the current metadata in EOTDL",
|
26
|
+
),
|
27
|
+
sync_metadata: bool = typer.Option(
|
28
|
+
False,
|
29
|
+
"--sync",
|
30
|
+
"-s",
|
31
|
+
help="Sync local metadata with the EOTDL. Will overwrite the local metadata",
|
32
|
+
),
|
33
|
+
):
|
34
|
+
"""
|
35
|
+
Ingest a pipeline to the EOTDL.
|
36
|
+
|
37
|
+
This command ingests the pipeline to the EOTDL. The pipeline must be a folder with the pipeline files,
|
38
|
+
and at least a README.md file. All the files in the folder will be uploaded to the EOTDL.
|
39
|
+
\n\n
|
40
|
+
Examples\n
|
41
|
+
--------\n
|
42
|
+
$ eotdl pipelines ingest --path /path/to/folder-with-pipeline --verbose True
|
43
|
+
"""
|
44
|
+
try:
|
45
|
+
ingest_openeo(path, verbose, typer.echo, foce_metadata_update, sync_metadata)
|
46
|
+
except Exception as e:
|
47
|
+
typer.echo(e)
|
48
|
+
|
49
|
+
@app.command()
|
50
|
+
def list(
|
51
|
+
name: str = typer.Option(
|
52
|
+
None, "--name", "-n", help="Filter the returned pipelines by name"
|
53
|
+
),
|
54
|
+
limit: int = typer.Option(
|
55
|
+
None, "--limit", "-l", help="Limit the number of returned results"
|
56
|
+
),
|
57
|
+
):
|
58
|
+
"""
|
59
|
+
Retrieve a list with all the pipelines in the EOTDL.
|
60
|
+
|
61
|
+
If using --name, it will filter the results by name. If no name is provided, it will return all the pipelines.\n
|
62
|
+
If using --limit, it will limit the number of results. If no limit is provided, it will return all the pipelines.
|
63
|
+
\n\n
|
64
|
+
Examples\n
|
65
|
+
--------\n
|
66
|
+
$ eotdl pipelines list\n
|
67
|
+
$ eotdl pipelines list --name YourPipeline --limit 5
|
68
|
+
"""
|
69
|
+
try:
|
70
|
+
pipelines = retrieve_pipelines(name, limit)
|
71
|
+
typer.echo(pipelines)
|
72
|
+
except Exception as e:
|
73
|
+
typer.echo(e)
|
74
|
+
|
75
|
+
@app.command()
|
76
|
+
def get(
|
77
|
+
pipeline: str = typer.Argument(None, help="Name of the pipeline to download"),
|
78
|
+
path: str = typer.Option(
|
79
|
+
None, "--path", "-p", help="Download the pipeline to a specific output path"
|
80
|
+
),
|
81
|
+
version: int = typer.Option(None, "--version", "-v", help="pipeline version"),
|
82
|
+
assets: bool = typer.Option(
|
83
|
+
False, "--assets", "-a", help="Download STAC assets from the pipeline"
|
84
|
+
),
|
85
|
+
force: bool = typer.Option(
|
86
|
+
False, "--force", "-f", help="Force download even if file exists"
|
87
|
+
),
|
88
|
+
verbose: bool = typer.Option(
|
89
|
+
False,
|
90
|
+
"--verbose",
|
91
|
+
help="Verbose output. This will print the progress of the download",
|
92
|
+
),
|
93
|
+
):
|
94
|
+
"""
|
95
|
+
Download a pipeline from the EOTDL.
|
96
|
+
\n\n
|
97
|
+
If using --path, it will download the pipeline to the specified path. If no path is provided, it will download to ~/.eotdl/pipelines.\n
|
98
|
+
If using --version, it will download the specified version. If no version is provided, it will download the latest version.\n
|
99
|
+
If using --assets when the pipeline is STAC, it will also download the STAC assets of the pipeline. If not provided, it will only download the STAC metadata.\n
|
100
|
+
If using --force, it will download the pipeline even if the file already exists.\n
|
101
|
+
If using --verbose, it will print the progress of the download.
|
102
|
+
\n\n
|
103
|
+
Examples\n
|
104
|
+
--------\n
|
105
|
+
$ eotdl pipelines get Yourpipeline\n
|
106
|
+
$ eotdl pipelines get Yourpipeline --path /path/to/download --version 1 --assets True --force True --verbose True
|
107
|
+
"""
|
108
|
+
try:
|
109
|
+
dst_path = stage_pipeline(
|
110
|
+
pipeline, version, path, typer.echo, assets, force, verbose
|
111
|
+
)
|
112
|
+
typer.echo(f"Data available at {dst_path}")
|
113
|
+
except Exception as e:
|
114
|
+
typer.echo(e)
|
115
|
+
|
116
|
+
|
117
|
+
@app.command()
|
118
|
+
def delete(
|
119
|
+
pipeline_name: str = typer.Argument(None, help="Name of the pipeline to delete")
|
120
|
+
):
|
121
|
+
"""
|
122
|
+
Delete a model from the EOTDL.
|
123
|
+
"""
|
124
|
+
try:
|
125
|
+
deactivate_pipeline(pipeline_name)
|
126
|
+
typer.echo(f"Pipeline {pipeline_name} deleted")
|
127
|
+
except Exception as e:
|
128
|
+
typer.echo(e)
|
129
|
+
|
130
|
+
|
131
|
+
if __name__ == "__main__":
|
132
|
+
app()
|
eotdl/datasets/__init__.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
from .retrieve import retrieve_datasets, retrieve_dataset, retrieve_dataset_files
|
1
|
+
from .retrieve import retrieve_datasets, retrieve_dataset, retrieve_dataset_files, retrieve_private_datasets
|
2
2
|
from .ingest import ingest_dataset, ingest_virtual_dataset, ingest_dataset_catalog
|
3
3
|
from .stage import stage_dataset, stage_dataset_file
|
4
4
|
from .update import deactivate_dataset
|
eotdl/datasets/ingest.py
CHANGED
@@ -3,16 +3,26 @@ from pathlib import Path
|
|
3
3
|
from ..repos import DatasetsAPIRepo
|
4
4
|
from ..files.ingest import prep_ingest_stac, prep_ingest_folder, ingest, ingest_virtual, ingest_catalog
|
5
5
|
|
6
|
-
def retrieve_dataset(metadata, user):
|
6
|
+
def retrieve_dataset(metadata, user, private):
|
7
7
|
repo = DatasetsAPIRepo()
|
8
8
|
data, error = repo.retrieve_dataset(metadata.name)
|
9
9
|
if data and data["uid"] != user["uid"]:
|
10
10
|
raise Exception("Dataset already exists.")
|
11
|
-
if error
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
11
|
+
if error:
|
12
|
+
if error == "Dataset doesn't exist":
|
13
|
+
# create dataset
|
14
|
+
data, error = repo.create_dataset(metadata.dict(), user, private)
|
15
|
+
# print(data, error)
|
16
|
+
if error:
|
17
|
+
raise Exception(error)
|
18
|
+
elif error == "NoAccessToPrivateError":
|
19
|
+
data, error = repo.retrieve_private_dataset(metadata.name, user)
|
20
|
+
if error:
|
21
|
+
if error == "NoAccessToPrivateError":
|
22
|
+
raise Exception("Dataset already exists.")
|
23
|
+
else:
|
24
|
+
raise Exception(error)
|
25
|
+
else:
|
16
26
|
raise Exception(error)
|
17
27
|
return data
|
18
28
|
|
@@ -22,6 +32,7 @@ def ingest_dataset(
|
|
22
32
|
logger=print,
|
23
33
|
force_metadata_update=False,
|
24
34
|
sync_metadata=False,
|
35
|
+
private=False,
|
25
36
|
):
|
26
37
|
path = Path(path)
|
27
38
|
if not path.is_dir():
|
@@ -30,7 +41,7 @@ def ingest_dataset(
|
|
30
41
|
prep_ingest_stac(path, logger)
|
31
42
|
else:
|
32
43
|
prep_ingest_folder(path, verbose, logger, force_metadata_update, sync_metadata)
|
33
|
-
return ingest(path, DatasetsAPIRepo(), retrieve_dataset, 'datasets')
|
44
|
+
return ingest(path, DatasetsAPIRepo(), retrieve_dataset, 'datasets', private)
|
34
45
|
|
35
46
|
|
36
47
|
def ingest_virtual_dataset( # could work for a list of paths with minimal changes...
|
eotdl/datasets/retrieve.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
from ..repos import DatasetsAPIRepo, FilesAPIRepo
|
2
|
+
from ..auth import with_auth
|
2
3
|
|
3
4
|
|
4
5
|
def retrieve_datasets(name=None, limit=None):
|
@@ -10,11 +11,16 @@ def retrieve_datasets(name=None, limit=None):
|
|
10
11
|
return []
|
11
12
|
|
12
13
|
|
13
|
-
def retrieve_dataset(name):
|
14
|
+
def retrieve_dataset(name, user=None):
|
14
15
|
repo = DatasetsAPIRepo()
|
15
16
|
data, error = repo.retrieve_dataset(name)
|
16
17
|
if error:
|
17
|
-
|
18
|
+
if error == "NoAccessToPrivateError" and user is not None:
|
19
|
+
data, error = repo.retrieve_private_dataset(name, user)
|
20
|
+
if error:
|
21
|
+
raise Exception(error)
|
22
|
+
else:
|
23
|
+
raise Exception(error)
|
18
24
|
return data
|
19
25
|
|
20
26
|
|
@@ -25,3 +31,11 @@ def retrieve_dataset_files(dataset_id, version):
|
|
25
31
|
raise Exception(error)
|
26
32
|
return data
|
27
33
|
|
34
|
+
@with_auth
|
35
|
+
def retrieve_private_datasets(user):
|
36
|
+
api_repo = DatasetsAPIRepo()
|
37
|
+
data, error = api_repo.retrieve_private_datasets(user)
|
38
|
+
if data and not error:
|
39
|
+
datasets = [d["name"] for d in data] if data else []
|
40
|
+
return datasets
|
41
|
+
return []
|
eotdl/datasets/stage.py
CHANGED
eotdl/datasets/update.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
from ..repos import DatasetsAPIRepo
|
2
|
-
|
2
|
+
from ..auth import with_auth
|
3
|
+
from .retrieve import retrieve_dataset
|
3
4
|
|
4
5
|
def update_dataset(dataset_id, metadata, content, user):
|
5
6
|
repo = DatasetsAPIRepo()
|
@@ -16,10 +17,11 @@ def update_dataset(dataset_id, metadata, content, user):
|
|
16
17
|
raise Exception(error)
|
17
18
|
return data
|
18
19
|
|
19
|
-
|
20
|
-
def deactivate_dataset(
|
20
|
+
@with_auth
|
21
|
+
def deactivate_dataset(dataset_name, user):
|
22
|
+
dataset = retrieve_dataset(dataset_name)
|
21
23
|
repo = DatasetsAPIRepo()
|
22
|
-
data, error = repo.deactivate_dataset(
|
24
|
+
data, error = repo.deactivate_dataset(dataset['id'], user)
|
23
25
|
if error:
|
24
26
|
raise Exception(error)
|
25
27
|
return data
|
eotdl/fe/__init__.py
ADDED
eotdl/fe/ingest.py
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
|
3
|
+
from ..repos import FEAPIRepo
|
4
|
+
from ..files.ingest import prep_ingest_folder, ingest
|
5
|
+
|
6
|
+
def retrieve_pipeline(metadata, user):
|
7
|
+
repo = FEAPIRepo()
|
8
|
+
data, error = repo.retrieve_pipeline(metadata.name)
|
9
|
+
if data and data["uid"] != user["uid"]:
|
10
|
+
raise Exception("Pipeline already exists.")
|
11
|
+
if error and error == "Pipeline doesn't exist":
|
12
|
+
# create pipeline
|
13
|
+
data, error = repo.create_pipeline(metadata.dict(), user)
|
14
|
+
# print(data, error)
|
15
|
+
if error:
|
16
|
+
raise Exception(error)
|
17
|
+
return data
|
18
|
+
|
19
|
+
def ingest_openeo(
|
20
|
+
path,
|
21
|
+
verbose=False,
|
22
|
+
logger=print,
|
23
|
+
force_metadata_update=False,
|
24
|
+
sync_metadata=False,
|
25
|
+
private=False,
|
26
|
+
):
|
27
|
+
path = Path(path)
|
28
|
+
if not path.is_dir():
|
29
|
+
raise Exception("Path must be a folder")
|
30
|
+
# if "catalog.json" in [f.name for f in path.iterdir()]:
|
31
|
+
# prep_ingest_stac(path, logger)
|
32
|
+
# else:
|
33
|
+
# prep_ingest_folder(path, verbose, logger, force_metadata_update, sync_metadata)
|
34
|
+
prep_ingest_folder(path, verbose, logger, force_metadata_update, sync_metadata)
|
35
|
+
return ingest(path, FEAPIRepo(), retrieve_pipeline, 'pipelines', private)
|
36
|
+
|
37
|
+
|
38
|
+
# def ingest_virtual_dataset( # could work for a list of paths with minimal changes...
|
39
|
+
# path,
|
40
|
+
# links,
|
41
|
+
# metadata = None,
|
42
|
+
# logger=print,
|
43
|
+
# user=None,
|
44
|
+
# ):
|
45
|
+
# return ingest_virtual(path, links, DatasetsAPIRepo(), retrieve_dataset, 'datasets', metadata, logger)
|
46
|
+
|
47
|
+
# def ingest_dataset_catalog(path, logger=None):
|
48
|
+
# path = Path(path)
|
49
|
+
# return ingest_catalog(path, DatasetsAPIRepo(), retrieve_dataset, 'datasets')
|
@@ -0,0 +1,124 @@
|
|
1
|
+
from .dataframe_utils import *
|
2
|
+
import openeo
|
3
|
+
from openeo.extra.job_management import MultiBackendJobManager, CsvJobDatabase
|
4
|
+
from .s3proxy_utils import upload_geoparquet_file
|
5
|
+
import geojson
|
6
|
+
|
7
|
+
def start_job(row: pd.Series, connection: openeo.Connection, **kwargs) -> openeo.BatchJob:
|
8
|
+
|
9
|
+
temporal_extent = eval(row["temporal_extent"])
|
10
|
+
crs = row['crs']
|
11
|
+
|
12
|
+
# set up load url in order to allow non-latlon feature collections for spatial filtering
|
13
|
+
geometry = geojson.loads(row["geometry"])
|
14
|
+
features = gpd.GeoDataFrame.from_features(geometry).set_crs(crs)
|
15
|
+
url = upload_geoparquet_file(features,connection)
|
16
|
+
|
17
|
+
#run the s1 and s2 udp
|
18
|
+
s1 = connection.datacube_from_process(
|
19
|
+
"s1_weekly_statistics",
|
20
|
+
namespace="https://raw.githubusercontent.com/earthpulse/eotdl/refs/heads/hv_openeoexample/tutorials/notebooks/openeo/s1_weekly_statistics.json",
|
21
|
+
temporal_extent=temporal_extent,
|
22
|
+
)
|
23
|
+
|
24
|
+
s2 = connection.datacube_from_process(
|
25
|
+
"s2_weekly_statistics",
|
26
|
+
namespace="https://raw.githubusercontent.com/earthpulse/eotdl/refs/heads/hv_openeoexample/tutorials/notebooks/openeo/s2_weekly_statistics.json",
|
27
|
+
temporal_extent=temporal_extent,
|
28
|
+
)
|
29
|
+
|
30
|
+
#merge both cubes and filter across the feature collection
|
31
|
+
merged = s2.merge_cubes(s1)
|
32
|
+
result = merged.filter_spatial(connection.load_url(url, format="Parquet"))
|
33
|
+
|
34
|
+
#dedicated job settings to save the individual features within a collection seperately
|
35
|
+
job = result.create_job(
|
36
|
+
out_format="NetCDF",
|
37
|
+
sample_by_feature = True,
|
38
|
+
feature_id_property="id",
|
39
|
+
filename_prefix = "eotdl"
|
40
|
+
)
|
41
|
+
|
42
|
+
return job
|
43
|
+
|
44
|
+
def patch_extraction(
|
45
|
+
gdf,
|
46
|
+
start_date,
|
47
|
+
nb_months,
|
48
|
+
pixel_size = 64,
|
49
|
+
resolution = 10,
|
50
|
+
max_points = 5,
|
51
|
+
job_tracker = 'jobs.csv',
|
52
|
+
parallel_jobs=2
|
53
|
+
):
|
54
|
+
"""
|
55
|
+
# Transform GeoDataFrame for MultiBackendJobManager
|
56
|
+
|
57
|
+
This function processes an input GeoDataFrame and prepares it for use with openEO's **MultiBackendJobManager**. The job manager enables launching and tracking multiple openEO jobs simultaneously, which is essential for large-scale data extractions.
|
58
|
+
|
59
|
+
### Example Use Case
|
60
|
+
The function creates patches (e.g., 64x64 pixels) around polygon centers. These patches are suitable for machine learning applications, such as training convolutional neural networks (CNNs).
|
61
|
+
By combining patches into Sentinel-2 grid collections, the workflow ensures cost efficiency and optimized data extraction.
|
62
|
+
|
63
|
+
### Workflow
|
64
|
+
|
65
|
+
1. **Process the GeoDataFrame**
|
66
|
+
- Create patches with a fixed size around the center of polygon geometries.
|
67
|
+
- Calculate temporal extents for each geometry.
|
68
|
+
|
69
|
+
2. **Combine Features Using Sentinel-2 Tiling**
|
70
|
+
- Group buffered geometries into collections based on the Sentinel-2 tiling grid.
|
71
|
+
- Minimize redundant openEO cost.
|
72
|
+
|
73
|
+
3. **Generate Job Metadata DataFrame**
|
74
|
+
- Convert processed data into a DataFrame, ready for the MultiBackendJobManager.
|
75
|
+
|
76
|
+
### Parameters
|
77
|
+
|
78
|
+
#### Spatial Parameters:
|
79
|
+
- **Buffer Distance:** Buffer size (e.g., 320 meters for a 64x64 patch around polygon centers).
|
80
|
+
- **Resolution:** Spatial alignment resolution in meters.
|
81
|
+
|
82
|
+
#### Temporal Parameters:
|
83
|
+
- **Start Date:** Start of the temporal extent (e.g., `"2020-01-01"`).
|
84
|
+
- **Number of Months:** Duration of the temporal extent in months.
|
85
|
+
|
86
|
+
#### Job Splitting Parameters:
|
87
|
+
- **Max Points Per Job:** Maximum number of features per job batch.
|
88
|
+
"""
|
89
|
+
job_df = process_and_create_advanced_patch_jobs(
|
90
|
+
gdf, start_date, nb_months, pixel_size, resolution, max_points=max_points
|
91
|
+
)
|
92
|
+
"""
|
93
|
+
# Start Job with Standardized UDPs and Feature Collection Filtering
|
94
|
+
|
95
|
+
This function initializes an openEO batch job using standardized **User-Defined Processes (UDPs)** for Sentinel-1 and Sentinel-2 data processing. It employs a spatial filter designed for non-lat/lon feature collections to ensure precise patch sizes in UTM coordinates.
|
96
|
+
|
97
|
+
### Key Features
|
98
|
+
|
99
|
+
1. **Use of Standardized UDPs**
|
100
|
+
- **S1 Weekly Statistics:** Computes weekly statistics from Sentinel-1 data.
|
101
|
+
- **S2 Weekly Statistics:** Computes weekly statistics from Sentinel-2 data.
|
102
|
+
- UDPs are defined in external JSON files.
|
103
|
+
|
104
|
+
2. **Spatial Filtering with `load_url`**
|
105
|
+
- Accepts feature collections in **UTM coordinates** to guarantee patches with exact dimensions (e.g., 64x64 meters).
|
106
|
+
- Features are uploaded as a GeoParquet file to an creodias S3 bucket, enabling spatial filtering directly on the server.
|
107
|
+
|
108
|
+
3. **Cube Merging**
|
109
|
+
- Merges Sentinel-1 and Sentinel-2 datacubes for combined analysis.
|
110
|
+
|
111
|
+
4. **Job Configuration**
|
112
|
+
- Saves each feature in the collection as a separate file.
|
113
|
+
- Outputs results in **NetCDF** format with filenames derived
|
114
|
+
"""
|
115
|
+
# Authenticate and add the backend
|
116
|
+
connection = openeo.connect(url="openeo.dataspace.copernicus.eu").authenticate_oidc()
|
117
|
+
# initialize the job manager
|
118
|
+
manager = MultiBackendJobManager()
|
119
|
+
manager.add_backend("cdse", connection=connection, parallel_jobs=parallel_jobs)
|
120
|
+
job_db = CsvJobDatabase(path=job_tracker)
|
121
|
+
if not job_db.exists():
|
122
|
+
df = manager._normalize_df(job_df)
|
123
|
+
job_db.persist(df)
|
124
|
+
manager.run_jobs(start_job=start_job, job_db=job_db)
|