eotdl 2025.4.22.post2__py3-none-any.whl → 2025.5.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
eotdl/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "2025.04.22-2"
1
+ __version__ = "2025.05.26"
eotdl/cli.py CHANGED
@@ -1,7 +1,7 @@
1
1
  import typer
2
2
  import os
3
3
 
4
- from .commands import auth, datasets, models, stac
4
+ from .commands import auth, datasets, models, stac, pipelines
5
5
  from .repos import APIRepo
6
6
  from . import __version__
7
7
 
@@ -11,6 +11,7 @@ app.add_typer(auth.app, name="auth")
11
11
  app.add_typer(datasets.app, name="datasets")
12
12
  app.add_typer(models.app, name="models")
13
13
  app.add_typer(stac.app, name="stac")
14
+ app.add_typer(pipelines.app, name="pipelines")
14
15
 
15
16
  @app.command()
16
17
  def version():
eotdl/commands/auth.py CHANGED
@@ -28,7 +28,9 @@ def login():
28
28
 
29
29
 
30
30
  @app.command()
31
- def logout():
31
+ def logout(
32
+ yes: bool = typer.Option(False, "--yes", "-y", help="Skip confirmation")
33
+ ):
32
34
  """
33
35
  Logout from the EOTDL.
34
36
 
@@ -37,7 +39,8 @@ def logout():
37
39
  user = is_logged()
38
40
  if user:
39
41
  typer.echo(f"You are logged in as {user['email']}")
40
- typer.confirm("Are you sure you want to logout?", abort=True)
42
+ if not yes:
43
+ typer.confirm("Are you sure you want to logout?", abort=True)
41
44
  logout_url = logout_user()
42
45
  typer.echo("You are logged out.")
43
46
  typer.echo(
@@ -6,6 +6,7 @@ from ..datasets import (
6
6
  ingest_dataset,
7
7
  stage_dataset,
8
8
  deactivate_dataset,
9
+ retrieve_private_datasets
9
10
  )
10
11
 
11
12
  app = typer.Typer(help="Explore, ingest and download training datasets.")
@@ -72,6 +73,9 @@ def list(
72
73
  limit: int = typer.Option(
73
74
  None, "--limit", "-l", help="Limit the number of returned results"
74
75
  ),
76
+ private: bool = typer.Option(
77
+ False, "--private", "-p", help="Show private datasets"
78
+ ),
75
79
  ):
76
80
  """
77
81
  Retrieve a list with all the datasets in the EOTDL.
@@ -85,7 +89,10 @@ def list(
85
89
  $ eotdl datasets list --name YourModel --limit 5
86
90
  """
87
91
  try:
88
- datasets = retrieve_datasets(name, limit)
92
+ if private:
93
+ datasets = retrieve_private_datasets()
94
+ else:
95
+ datasets = retrieve_datasets(name, limit)
89
96
  typer.echo(datasets)
90
97
  except Exception as e:
91
98
  typer.echo(e)
@@ -96,9 +103,6 @@ def get(
96
103
  path: str = typer.Option(
97
104
  None, "--path", "-p", help="Download the dataset to a specific output path"
98
105
  ),
99
- file: str = typer.Option(
100
- None, "--file", "-f", help="Download a specific file from the dataset"
101
- ),
102
106
  version: int = typer.Option(None, "--version", "-v", help="Dataset version"),
103
107
  assets: bool = typer.Option(
104
108
  False, "--assets", "-a", help="Download STAC assets from the dataset"
@@ -116,7 +120,6 @@ def get(
116
120
  Download a dataset from the EOTDL.
117
121
  \n\n
118
122
  If using --path, it will download the dataset to the specified path. If no path is provided, it will download to ~/.eotdl/datasets.\n
119
- If using --file, it will download the specified file. If no file is provided, it will download the entire dataset.\n
120
123
  If using --version, it will download the specified version. If no version is provided, it will download the latest version.\n
121
124
  If using --assets when the dataset is STAC, it will also download the STAC assets of the dataset. If not provided, it will only download the STAC metadata.\n
122
125
  If using --force, it will download the dataset even if the file already exists.\n
@@ -125,7 +128,7 @@ def get(
125
128
  Examples\n
126
129
  --------\n
127
130
  $ eotdl dataset get YourDataset\n
128
- $ eotdl dataset get YourDataset --path /path/to/download --file dataset.zip --version 1 --assets True --force True --verbose True
131
+ $ eotdl dataset get YourDataset --path /path/to/download --version 1 --assets True --force True --verbose True
129
132
  """
130
133
  try:
131
134
  dst_path = stage_dataset(
@@ -143,12 +146,15 @@ def get(
143
146
 
144
147
 
145
148
  @app.command()
146
- def deactivate(
147
- dataset_id: str = typer.Argument(None, help="ID of the dataset to deactivate")
149
+ def delete(
150
+ dataset: str = typer.Argument(None, help="Name of the dataset to deactivate")
148
151
  ):
152
+ """
153
+ Delete a dataset from the EOTDL.
154
+ """
149
155
  try:
150
- deactivate_dataset(dataset_id)
151
- typer.echo(f"Dataset {dataset_id} deactivated")
156
+ deactivate_dataset(dataset)
157
+ typer.echo(f"Dataset {dataset} deleted")
152
158
  except Exception as e:
153
159
  typer.echo(e)
154
160
 
eotdl/commands/models.py CHANGED
@@ -93,9 +93,6 @@ def get(
93
93
  path: str = typer.Option(
94
94
  None, "--path", "-p", help="Download the model to a specific output path"
95
95
  ),
96
- file: str = typer.Option(
97
- None, "--file", "-f", help="Download a specific file from the model"
98
- ),
99
96
  version: int = typer.Option(None, "--version", "-v", help="Model version"),
100
97
  assets: bool = typer.Option(
101
98
  False, "--assets", "-a", help="Download STAC assets from the model"
@@ -113,7 +110,6 @@ def get(
113
110
  Download a model from the EOTDL.
114
111
  \n\n
115
112
  If using --path, it will download the model to the specified path. If no path is provided, it will download to ~/.eotdl/models.\n
116
- If using --file, it will download the specified file. If no file is provided, it will download the entire model.\n
117
113
  If using --version, it will download the specified version. If no version is provided, it will download the latest version.\n
118
114
  If using --assets when the model is STAC, it will also download the STAC assets of the model. If not provided, it will only download the STAC metadata.\n
119
115
  If using --force, it will download the model even if the file already exists.\n
@@ -122,7 +118,7 @@ def get(
122
118
  Examples\n
123
119
  --------\n
124
120
  $ eotdl models get YourModel\n
125
- $ eotdl models get YourModel --path /path/to/download --file model.zip --version 1 --assets True --force True --verbose True
121
+ $ eotdl models get YourModel --path /path/to/download --version 1 --assets True --force True --verbose True
126
122
  """
127
123
  try:
128
124
  dst_path = stage_model(
@@ -134,12 +130,15 @@ def get(
134
130
 
135
131
 
136
132
  @app.command()
137
- def deactivate(
138
- model_id: str = typer.Argument(None, help="ID of the model to deactivate")
133
+ def delete(
134
+ model_name: str = typer.Argument(None, help="Name of the model to delete")
139
135
  ):
136
+ """
137
+ Delete a model from the EOTDL.
138
+ """
140
139
  try:
141
- deactivate_model(model_id)
142
- typer.echo(f"Model {model_id} deactivated")
140
+ deactivate_model(model_name)
141
+ typer.echo(f"Model {model_name} deleted")
143
142
  except Exception as e:
144
143
  typer.echo(e)
145
144
 
@@ -0,0 +1,132 @@
1
+ import typer
2
+ from pathlib import Path
3
+
4
+ from ..fe import (
5
+ ingest_openeo,
6
+ retrieve_pipelines,
7
+ stage_pipeline,
8
+ deactivate_pipeline,
9
+ )
10
+
11
+ app = typer.Typer(help="Explore, ingest and download Feature Engineering Pipelines.")
12
+
13
+ @app.command()
14
+ def ingest(
15
+ path: Path = typer.Option(..., "--path", "-p", help="Path to the pipeline to ingest"),
16
+ verbose: bool = typer.Option(
17
+ False,
18
+ "--verbose",
19
+ help="Verbose output. This will print the progress of the ingestion",
20
+ ),
21
+ foce_metadata_update: bool = typer.Option(
22
+ False,
23
+ "--force",
24
+ "-f",
25
+ help="Force metadata update even if it already exists. Will overwrite the current metadata in EOTDL",
26
+ ),
27
+ sync_metadata: bool = typer.Option(
28
+ False,
29
+ "--sync",
30
+ "-s",
31
+ help="Sync local metadata with the EOTDL. Will overwrite the local metadata",
32
+ ),
33
+ ):
34
+ """
35
+ Ingest a pipeline to the EOTDL.
36
+
37
+ This command ingests the pipeline to the EOTDL. The pipeline must be a folder with the pipeline files,
38
+ and at least a README.md file. All the files in the folder will be uploaded to the EOTDL.
39
+ \n\n
40
+ Examples\n
41
+ --------\n
42
+ $ eotdl pipelines ingest --path /path/to/folder-with-pipeline --verbose True
43
+ """
44
+ try:
45
+ ingest_openeo(path, verbose, typer.echo, foce_metadata_update, sync_metadata)
46
+ except Exception as e:
47
+ typer.echo(e)
48
+
49
+ @app.command()
50
+ def list(
51
+ name: str = typer.Option(
52
+ None, "--name", "-n", help="Filter the returned pipelines by name"
53
+ ),
54
+ limit: int = typer.Option(
55
+ None, "--limit", "-l", help="Limit the number of returned results"
56
+ ),
57
+ ):
58
+ """
59
+ Retrieve a list with all the pipelines in the EOTDL.
60
+
61
+ If using --name, it will filter the results by name. If no name is provided, it will return all the pipelines.\n
62
+ If using --limit, it will limit the number of results. If no limit is provided, it will return all the pipelines.
63
+ \n\n
64
+ Examples\n
65
+ --------\n
66
+ $ eotdl pipelines list\n
67
+ $ eotdl pipelines list --name YourPipeline --limit 5
68
+ """
69
+ try:
70
+ pipelines = retrieve_pipelines(name, limit)
71
+ typer.echo(pipelines)
72
+ except Exception as e:
73
+ typer.echo(e)
74
+
75
+ @app.command()
76
+ def get(
77
+ pipeline: str = typer.Argument(None, help="Name of the pipeline to download"),
78
+ path: str = typer.Option(
79
+ None, "--path", "-p", help="Download the pipeline to a specific output path"
80
+ ),
81
+ version: int = typer.Option(None, "--version", "-v", help="pipeline version"),
82
+ assets: bool = typer.Option(
83
+ False, "--assets", "-a", help="Download STAC assets from the pipeline"
84
+ ),
85
+ force: bool = typer.Option(
86
+ False, "--force", "-f", help="Force download even if file exists"
87
+ ),
88
+ verbose: bool = typer.Option(
89
+ False,
90
+ "--verbose",
91
+ help="Verbose output. This will print the progress of the download",
92
+ ),
93
+ ):
94
+ """
95
+ Download a pipeline from the EOTDL.
96
+ \n\n
97
+ If using --path, it will download the pipeline to the specified path. If no path is provided, it will download to ~/.eotdl/pipelines.\n
98
+ If using --version, it will download the specified version. If no version is provided, it will download the latest version.\n
99
+ If using --assets when the pipeline is STAC, it will also download the STAC assets of the pipeline. If not provided, it will only download the STAC metadata.\n
100
+ If using --force, it will download the pipeline even if the file already exists.\n
101
+ If using --verbose, it will print the progress of the download.
102
+ \n\n
103
+ Examples\n
104
+ --------\n
105
+ $ eotdl pipelines get Yourpipeline\n
106
+ $ eotdl pipelines get Yourpipeline --path /path/to/download --version 1 --assets True --force True --verbose True
107
+ """
108
+ try:
109
+ dst_path = stage_pipeline(
110
+ pipeline, version, path, typer.echo, assets, force, verbose
111
+ )
112
+ typer.echo(f"Data available at {dst_path}")
113
+ except Exception as e:
114
+ typer.echo(e)
115
+
116
+
117
+ @app.command()
118
+ def delete(
119
+ pipeline_name: str = typer.Argument(None, help="Name of the pipeline to delete")
120
+ ):
121
+ """
122
+ Delete a model from the EOTDL.
123
+ """
124
+ try:
125
+ deactivate_pipeline(pipeline_name)
126
+ typer.echo(f"Pipeline {pipeline_name} deleted")
127
+ except Exception as e:
128
+ typer.echo(e)
129
+
130
+
131
+ if __name__ == "__main__":
132
+ app()
@@ -1,4 +1,4 @@
1
- from .retrieve import retrieve_datasets, retrieve_dataset, retrieve_dataset_files
1
+ from .retrieve import retrieve_datasets, retrieve_dataset, retrieve_dataset_files, retrieve_private_datasets
2
2
  from .ingest import ingest_dataset, ingest_virtual_dataset, ingest_dataset_catalog
3
3
  from .stage import stage_dataset, stage_dataset_file
4
4
  from .update import deactivate_dataset
eotdl/datasets/ingest.py CHANGED
@@ -3,16 +3,26 @@ from pathlib import Path
3
3
  from ..repos import DatasetsAPIRepo
4
4
  from ..files.ingest import prep_ingest_stac, prep_ingest_folder, ingest, ingest_virtual, ingest_catalog
5
5
 
6
- def retrieve_dataset(metadata, user):
6
+ def retrieve_dataset(metadata, user, private):
7
7
  repo = DatasetsAPIRepo()
8
8
  data, error = repo.retrieve_dataset(metadata.name)
9
9
  if data and data["uid"] != user["uid"]:
10
10
  raise Exception("Dataset already exists.")
11
- if error and error == "Dataset doesn't exist":
12
- # create dataset
13
- data, error = repo.create_dataset(metadata.dict(), user)
14
- # print(data, error)
15
- if error:
11
+ if error:
12
+ if error == "Dataset doesn't exist":
13
+ # create dataset
14
+ data, error = repo.create_dataset(metadata.dict(), user, private)
15
+ # print(data, error)
16
+ if error:
17
+ raise Exception(error)
18
+ elif error == "NoAccessToPrivateError":
19
+ data, error = repo.retrieve_private_dataset(metadata.name, user)
20
+ if error:
21
+ if error == "NoAccessToPrivateError":
22
+ raise Exception("Dataset already exists.")
23
+ else:
24
+ raise Exception(error)
25
+ else:
16
26
  raise Exception(error)
17
27
  return data
18
28
 
@@ -22,6 +32,7 @@ def ingest_dataset(
22
32
  logger=print,
23
33
  force_metadata_update=False,
24
34
  sync_metadata=False,
35
+ private=False,
25
36
  ):
26
37
  path = Path(path)
27
38
  if not path.is_dir():
@@ -30,7 +41,7 @@ def ingest_dataset(
30
41
  prep_ingest_stac(path, logger)
31
42
  else:
32
43
  prep_ingest_folder(path, verbose, logger, force_metadata_update, sync_metadata)
33
- return ingest(path, DatasetsAPIRepo(), retrieve_dataset, 'datasets')
44
+ return ingest(path, DatasetsAPIRepo(), retrieve_dataset, 'datasets', private)
34
45
 
35
46
 
36
47
  def ingest_virtual_dataset( # could work for a list of paths with minimal changes...
@@ -1,4 +1,5 @@
1
1
  from ..repos import DatasetsAPIRepo, FilesAPIRepo
2
+ from ..auth import with_auth
2
3
 
3
4
 
4
5
  def retrieve_datasets(name=None, limit=None):
@@ -10,11 +11,16 @@ def retrieve_datasets(name=None, limit=None):
10
11
  return []
11
12
 
12
13
 
13
- def retrieve_dataset(name):
14
+ def retrieve_dataset(name, user=None):
14
15
  repo = DatasetsAPIRepo()
15
16
  data, error = repo.retrieve_dataset(name)
16
17
  if error:
17
- raise Exception(error)
18
+ if error == "NoAccessToPrivateError" and user is not None:
19
+ data, error = repo.retrieve_private_dataset(name, user)
20
+ if error:
21
+ raise Exception(error)
22
+ else:
23
+ raise Exception(error)
18
24
  return data
19
25
 
20
26
 
@@ -25,3 +31,11 @@ def retrieve_dataset_files(dataset_id, version):
25
31
  raise Exception(error)
26
32
  return data
27
33
 
34
+ @with_auth
35
+ def retrieve_private_datasets(user):
36
+ api_repo = DatasetsAPIRepo()
37
+ data, error = api_repo.retrieve_private_datasets(user)
38
+ if data and not error:
39
+ datasets = [d["name"] for d in data] if data else []
40
+ return datasets
41
+ return []
eotdl/datasets/stage.py CHANGED
@@ -20,7 +20,7 @@ def stage_dataset(
20
20
  user=None,
21
21
  file=None,
22
22
  ):
23
- dataset = retrieve_dataset(dataset_name)
23
+ dataset = retrieve_dataset(dataset_name, user)
24
24
  if version is None:
25
25
  version = sorted([v['version_id'] for v in dataset["versions"]])[-1]
26
26
  else:
eotdl/datasets/update.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from ..repos import DatasetsAPIRepo
2
-
2
+ from ..auth import with_auth
3
+ from .retrieve import retrieve_dataset
3
4
 
4
5
  def update_dataset(dataset_id, metadata, content, user):
5
6
  repo = DatasetsAPIRepo()
@@ -16,10 +17,11 @@ def update_dataset(dataset_id, metadata, content, user):
16
17
  raise Exception(error)
17
18
  return data
18
19
 
19
-
20
- def deactivate_dataset(dataset_id):
20
+ @with_auth
21
+ def deactivate_dataset(dataset_name, user):
22
+ dataset = retrieve_dataset(dataset_name)
21
23
  repo = DatasetsAPIRepo()
22
- data, error = repo.deactivate_dataset(dataset_id)
24
+ data, error = repo.deactivate_dataset(dataset['id'], user)
23
25
  if error:
24
26
  raise Exception(error)
25
27
  return data
eotdl/fe/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ from .ingest import ingest_openeo
2
+ from .retrieve import retrieve_pipelines, retrieve_pipeline
3
+ from .stage import stage_pipeline
4
+ from .update import deactivate_pipeline
eotdl/fe/ingest.py ADDED
@@ -0,0 +1,49 @@
1
+ from pathlib import Path
2
+
3
+ from ..repos import FEAPIRepo
4
+ from ..files.ingest import prep_ingest_folder, ingest
5
+
6
+ def retrieve_pipeline(metadata, user):
7
+ repo = FEAPIRepo()
8
+ data, error = repo.retrieve_pipeline(metadata.name)
9
+ if data and data["uid"] != user["uid"]:
10
+ raise Exception("Pipeline already exists.")
11
+ if error and error == "Pipeline doesn't exist":
12
+ # create pipeline
13
+ data, error = repo.create_pipeline(metadata.dict(), user)
14
+ # print(data, error)
15
+ if error:
16
+ raise Exception(error)
17
+ return data
18
+
19
+ def ingest_openeo(
20
+ path,
21
+ verbose=False,
22
+ logger=print,
23
+ force_metadata_update=False,
24
+ sync_metadata=False,
25
+ private=False,
26
+ ):
27
+ path = Path(path)
28
+ if not path.is_dir():
29
+ raise Exception("Path must be a folder")
30
+ # if "catalog.json" in [f.name for f in path.iterdir()]:
31
+ # prep_ingest_stac(path, logger)
32
+ # else:
33
+ # prep_ingest_folder(path, verbose, logger, force_metadata_update, sync_metadata)
34
+ prep_ingest_folder(path, verbose, logger, force_metadata_update, sync_metadata)
35
+ return ingest(path, FEAPIRepo(), retrieve_pipeline, 'pipelines', private)
36
+
37
+
38
+ # def ingest_virtual_dataset( # could work for a list of paths with minimal changes...
39
+ # path,
40
+ # links,
41
+ # metadata = None,
42
+ # logger=print,
43
+ # user=None,
44
+ # ):
45
+ # return ingest_virtual(path, links, DatasetsAPIRepo(), retrieve_dataset, 'datasets', metadata, logger)
46
+
47
+ # def ingest_dataset_catalog(path, logger=None):
48
+ # path = Path(path)
49
+ # return ingest_catalog(path, DatasetsAPIRepo(), retrieve_dataset, 'datasets')
@@ -0,0 +1,2 @@
1
+ from .basic_point_extraction import point_extraction
2
+ from .advanced_patch_extraction import patch_extraction
@@ -0,0 +1,124 @@
1
+ from .dataframe_utils import *
2
+ import openeo
3
+ from openeo.extra.job_management import MultiBackendJobManager, CsvJobDatabase
4
+ from .s3proxy_utils import upload_geoparquet_file
5
+ import geojson
6
+
7
+ def start_job(row: pd.Series, connection: openeo.Connection, **kwargs) -> openeo.BatchJob:
8
+
9
+ temporal_extent = eval(row["temporal_extent"])
10
+ crs = row['crs']
11
+
12
+ # set up load url in order to allow non-latlon feature collections for spatial filtering
13
+ geometry = geojson.loads(row["geometry"])
14
+ features = gpd.GeoDataFrame.from_features(geometry).set_crs(crs)
15
+ url = upload_geoparquet_file(features,connection)
16
+
17
+ #run the s1 and s2 udp
18
+ s1 = connection.datacube_from_process(
19
+ "s1_weekly_statistics",
20
+ namespace="https://raw.githubusercontent.com/earthpulse/eotdl/refs/heads/hv_openeoexample/tutorials/notebooks/openeo/s1_weekly_statistics.json",
21
+ temporal_extent=temporal_extent,
22
+ )
23
+
24
+ s2 = connection.datacube_from_process(
25
+ "s2_weekly_statistics",
26
+ namespace="https://raw.githubusercontent.com/earthpulse/eotdl/refs/heads/hv_openeoexample/tutorials/notebooks/openeo/s2_weekly_statistics.json",
27
+ temporal_extent=temporal_extent,
28
+ )
29
+
30
+ #merge both cubes and filter across the feature collection
31
+ merged = s2.merge_cubes(s1)
32
+ result = merged.filter_spatial(connection.load_url(url, format="Parquet"))
33
+
34
+ #dedicated job settings to save the individual features within a collection seperately
35
+ job = result.create_job(
36
+ out_format="NetCDF",
37
+ sample_by_feature = True,
38
+ feature_id_property="id",
39
+ filename_prefix = "eotdl"
40
+ )
41
+
42
+ return job
43
+
44
+ def patch_extraction(
45
+ gdf,
46
+ start_date,
47
+ nb_months,
48
+ pixel_size = 64,
49
+ resolution = 10,
50
+ max_points = 5,
51
+ job_tracker = 'jobs.csv',
52
+ parallel_jobs=2
53
+ ):
54
+ """
55
+ # Transform GeoDataFrame for MultiBackendJobManager
56
+
57
+ This function processes an input GeoDataFrame and prepares it for use with openEO's **MultiBackendJobManager**. The job manager enables launching and tracking multiple openEO jobs simultaneously, which is essential for large-scale data extractions.
58
+
59
+ ### Example Use Case
60
+ The function creates patches (e.g., 64x64 pixels) around polygon centers. These patches are suitable for machine learning applications, such as training convolutional neural networks (CNNs).
61
+ By combining patches into Sentinel-2 grid collections, the workflow ensures cost efficiency and optimized data extraction.
62
+
63
+ ### Workflow
64
+
65
+ 1. **Process the GeoDataFrame**
66
+ - Create patches with a fixed size around the center of polygon geometries.
67
+ - Calculate temporal extents for each geometry.
68
+
69
+ 2. **Combine Features Using Sentinel-2 Tiling**
70
+ - Group buffered geometries into collections based on the Sentinel-2 tiling grid.
71
+ - Minimize redundant openEO cost.
72
+
73
+ 3. **Generate Job Metadata DataFrame**
74
+ - Convert processed data into a DataFrame, ready for the MultiBackendJobManager.
75
+
76
+ ### Parameters
77
+
78
+ #### Spatial Parameters:
79
+ - **Buffer Distance:** Buffer size (e.g., 320 meters for a 64x64 patch around polygon centers).
80
+ - **Resolution:** Spatial alignment resolution in meters.
81
+
82
+ #### Temporal Parameters:
83
+ - **Start Date:** Start of the temporal extent (e.g., `"2020-01-01"`).
84
+ - **Number of Months:** Duration of the temporal extent in months.
85
+
86
+ #### Job Splitting Parameters:
87
+ - **Max Points Per Job:** Maximum number of features per job batch.
88
+ """
89
+ job_df = process_and_create_advanced_patch_jobs(
90
+ gdf, start_date, nb_months, pixel_size, resolution, max_points=max_points
91
+ )
92
+ """
93
+ # Start Job with Standardized UDPs and Feature Collection Filtering
94
+
95
+ This function initializes an openEO batch job using standardized **User-Defined Processes (UDPs)** for Sentinel-1 and Sentinel-2 data processing. It employs a spatial filter designed for non-lat/lon feature collections to ensure precise patch sizes in UTM coordinates.
96
+
97
+ ### Key Features
98
+
99
+ 1. **Use of Standardized UDPs**
100
+ - **S1 Weekly Statistics:** Computes weekly statistics from Sentinel-1 data.
101
+ - **S2 Weekly Statistics:** Computes weekly statistics from Sentinel-2 data.
102
+ - UDPs are defined in external JSON files.
103
+
104
+ 2. **Spatial Filtering with `load_url`**
105
+ - Accepts feature collections in **UTM coordinates** to guarantee patches with exact dimensions (e.g., 64x64 meters).
106
+ - Features are uploaded as a GeoParquet file to an creodias S3 bucket, enabling spatial filtering directly on the server.
107
+
108
+ 3. **Cube Merging**
109
+ - Merges Sentinel-1 and Sentinel-2 datacubes for combined analysis.
110
+
111
+ 4. **Job Configuration**
112
+ - Saves each feature in the collection as a separate file.
113
+ - Outputs results in **NetCDF** format with filenames derived
114
+ """
115
+ # Authenticate and add the backend
116
+ connection = openeo.connect(url="openeo.dataspace.copernicus.eu").authenticate_oidc()
117
+ # initialize the job manager
118
+ manager = MultiBackendJobManager()
119
+ manager.add_backend("cdse", connection=connection, parallel_jobs=parallel_jobs)
120
+ job_db = CsvJobDatabase(path=job_tracker)
121
+ if not job_db.exists():
122
+ df = manager._normalize_df(job_df)
123
+ job_db.persist(df)
124
+ manager.run_jobs(start_job=start_job, job_db=job_db)