eotdl 2024.10.7__tar.gz → 2025.3.25__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. {eotdl-2024.10.7 → eotdl-2025.3.25}/PKG-INFO +5 -4
  2. {eotdl-2024.10.7 → eotdl-2025.3.25}/README.md +2 -2
  3. eotdl-2025.3.25/eotdl/__init__.py +1 -0
  4. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/access/search.py +0 -2
  5. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/access/sentinelhub/parameters.py +1 -1
  6. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/cli.py +2 -2
  7. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/commands/datasets.py +28 -31
  8. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/commands/models.py +27 -30
  9. eotdl-2025.3.25/eotdl/commands/stac.py +57 -0
  10. eotdl-2025.3.25/eotdl/curation/__init__.py +0 -0
  11. eotdl-2025.3.25/eotdl/curation/stac/__init__.py +1 -0
  12. eotdl-2025.3.25/eotdl/curation/stac/api.py +58 -0
  13. eotdl-2025.3.25/eotdl/curation/stac/stac.py +33 -0
  14. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/datasets/__init__.py +1 -1
  15. eotdl-2025.3.25/eotdl/datasets/ingest.py +36 -0
  16. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/datasets/retrieve.py +0 -9
  17. eotdl-2025.3.25/eotdl/datasets/stage.py +64 -0
  18. eotdl-2025.3.25/eotdl/files/__init__.py +0 -0
  19. eotdl-2024.10.7/eotdl/files/ingest.py → eotdl-2025.3.25/eotdl/files/ingest.bck +0 -0
  20. eotdl-2025.3.25/eotdl/files/ingest.py +243 -0
  21. {eotdl-2024.10.7/eotdl/datasets → eotdl-2025.3.25/eotdl/files}/metadata.py +16 -17
  22. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/models/__init__.py +1 -1
  23. eotdl-2025.3.25/eotdl/models/ingest.py +34 -0
  24. eotdl-2025.3.25/eotdl/models/stage.py +60 -0
  25. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/repos/APIRepo.py +1 -1
  26. eotdl-2025.3.25/eotdl/repos/DatasetsAPIRepo.py +93 -0
  27. eotdl-2025.3.25/eotdl/repos/FilesAPIRepo.py +293 -0
  28. eotdl-2025.3.25/eotdl/repos/STACAPIRepo.py +40 -0
  29. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/repos/__init__.py +1 -0
  30. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/tools/geo_utils.py +7 -2
  31. {eotdl-2024.10.7 → eotdl-2025.3.25}/pyproject.toml +1 -1
  32. eotdl-2024.10.7/eotdl/__init__.py +0 -1
  33. eotdl-2024.10.7/eotdl/curation/__init__.py +0 -8
  34. eotdl-2024.10.7/eotdl/curation/stac/__init__.py +0 -8
  35. eotdl-2024.10.7/eotdl/curation/stac/assets.py +0 -110
  36. eotdl-2024.10.7/eotdl/curation/stac/dataframe.py +0 -172
  37. eotdl-2024.10.7/eotdl/curation/stac/dataframe_bck.py +0 -253
  38. eotdl-2024.10.7/eotdl/curation/stac/dataframe_labeling.py +0 -63
  39. eotdl-2024.10.7/eotdl/curation/stac/extensions/__init__.py +0 -23
  40. eotdl-2024.10.7/eotdl/curation/stac/extensions/base.py +0 -30
  41. eotdl-2024.10.7/eotdl/curation/stac/extensions/dem.py +0 -18
  42. eotdl-2024.10.7/eotdl/curation/stac/extensions/eo.py +0 -117
  43. eotdl-2024.10.7/eotdl/curation/stac/extensions/label/__init__.py +0 -7
  44. eotdl-2024.10.7/eotdl/curation/stac/extensions/label/base.py +0 -136
  45. eotdl-2024.10.7/eotdl/curation/stac/extensions/label/image_name_labeler.py +0 -203
  46. eotdl-2024.10.7/eotdl/curation/stac/extensions/label/scaneo.py +0 -219
  47. eotdl-2024.10.7/eotdl/curation/stac/extensions/ml_dataset.py +0 -648
  48. eotdl-2024.10.7/eotdl/curation/stac/extensions/projection.py +0 -44
  49. eotdl-2024.10.7/eotdl/curation/stac/extensions/raster.py +0 -53
  50. eotdl-2024.10.7/eotdl/curation/stac/extensions/sar.py +0 -55
  51. eotdl-2024.10.7/eotdl/curation/stac/extent.py +0 -158
  52. eotdl-2024.10.7/eotdl/curation/stac/parsers.py +0 -61
  53. eotdl-2024.10.7/eotdl/curation/stac/stac.py +0 -343
  54. eotdl-2024.10.7/eotdl/datasets/download.py +0 -104
  55. eotdl-2024.10.7/eotdl/datasets/ingest.py +0 -167
  56. eotdl-2024.10.7/eotdl/files/__init__.py +0 -2
  57. eotdl-2024.10.7/eotdl/files/list_files.py +0 -13
  58. eotdl-2024.10.7/eotdl/models/download.py +0 -101
  59. eotdl-2024.10.7/eotdl/models/ingest.py +0 -165
  60. eotdl-2024.10.7/eotdl/models/metadata.py +0 -43
  61. eotdl-2024.10.7/eotdl/repos/DatasetsAPIRepo.py +0 -80
  62. eotdl-2024.10.7/eotdl/repos/FilesAPIRepo.py +0 -200
  63. eotdl-2024.10.7/eotdl/wrappers/utils.py +0 -35
  64. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/access/__init__.py +0 -0
  65. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/access/airbus/__init__.py +0 -0
  66. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/access/airbus/client.py +0 -0
  67. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/access/airbus/parameters.py +0 -0
  68. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/access/airbus/utils.py +0 -0
  69. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/access/download.py +0 -0
  70. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/access/sentinelhub/__init__.py +0 -0
  71. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/access/sentinelhub/client.py +0 -0
  72. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/access/sentinelhub/evalscripts.py +0 -0
  73. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/access/sentinelhub/utils.py +0 -0
  74. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/auth/__init__.py +0 -0
  75. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/auth/auth.py +0 -0
  76. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/auth/errors.py +0 -0
  77. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/auth/is_logged.py +0 -0
  78. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/auth/logout.py +0 -0
  79. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/commands/__init__.py +0 -0
  80. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/commands/auth.py +0 -0
  81. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/datasets/update.py +0 -0
  82. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/models/retrieve.py +0 -0
  83. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/models/update.py +0 -0
  84. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/repos/AuthAPIRepo.py +0 -0
  85. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/repos/AuthRepo.py +0 -0
  86. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/repos/ModelsAPIRepo.py +0 -0
  87. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/shared/__init__.py +0 -0
  88. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/shared/checksum.py +0 -0
  89. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/tools/__init__.py +0 -0
  90. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/tools/metadata.py +0 -0
  91. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/tools/paths.py +0 -0
  92. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/tools/stac.py +0 -0
  93. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/tools/time_utils.py +0 -0
  94. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/tools/tools.py +0 -0
  95. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/wrappers/__init__.py +0 -0
  96. {eotdl-2024.10.7 → eotdl-2025.3.25}/eotdl/wrappers/models.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: eotdl
3
- Version: 2024.10.7
3
+ Version: 2025.3.25
4
4
  Summary: Earth Observation Training Data Lab
5
5
  License: MIT
6
6
  Author: EarthPulse
@@ -13,6 +13,7 @@ Classifier: Programming Language :: Python :: 3.9
13
13
  Classifier: Programming Language :: Python :: 3.10
14
14
  Classifier: Programming Language :: Python :: 3.11
15
15
  Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
16
17
  Requires-Dist: black (>=23.10.1,<24.0.0)
17
18
  Requires-Dist: geomet (>=1.0.0,<2.0.0)
18
19
  Requires-Dist: geopandas (>=0.13.2,<0.14.0)
@@ -49,10 +50,10 @@ Description-Content-Type: text/markdown
49
50
 
50
51
  This is the main library and CLI for the **Earth Observation Training Data Lab** (EOTDL), a complete environment that allows you, among other things, to:
51
52
 
52
- - Explore and download Training Datasets (TDS) for Earth Observation (EO) applications.
53
+ - Explore and stage Training Datasets (TDS) for Earth Observation (EO) applications.
53
54
  - Create and upload your own TDS by combining and annotating EO data from different sources.
54
55
  - Train Machine Learning (ML) models using the hosted TDS in the cloud with multi-GPU machines.
55
- - Explore and download pre-trianed ML models for EO applications.
56
+ - Explore and stage pre-trianed ML models for EO applications.
56
57
 
57
58
  In our blog you will find tutorials to learn how leverage the EOTDL to create and use TDS and ML models for your own EO applications.
58
59
 
@@ -15,10 +15,10 @@
15
15
 
16
16
  This is the main library and CLI for the **Earth Observation Training Data Lab** (EOTDL), a complete environment that allows you, among other things, to:
17
17
 
18
- - Explore and download Training Datasets (TDS) for Earth Observation (EO) applications.
18
+ - Explore and stage Training Datasets (TDS) for Earth Observation (EO) applications.
19
19
  - Create and upload your own TDS by combining and annotating EO data from different sources.
20
20
  - Train Machine Learning (ML) models using the hosted TDS in the cloud with multi-GPU machines.
21
- - Explore and download pre-trianed ML models for EO applications.
21
+ - Explore and stage pre-trianed ML models for EO applications.
22
22
 
23
23
  In our blog you will find tutorials to learn how leverage the EOTDL to create and use TDS and ML models for your own EO applications.
24
24
 
@@ -0,0 +1 @@
1
+ __version__ = "2025.03.25"
@@ -19,8 +19,6 @@ def search_sentinel_imagery(
19
19
  evaluate_sentinel_parameters(
20
20
  sensor, time_interval, bounding_box, output_needed=False
21
21
  )
22
-
23
22
  client = SHClient()
24
23
  parameters = SH_PARAMETERS_DICT[sensor]()
25
-
26
24
  return client.search_data(bounding_box, time_interval, parameters)
@@ -42,7 +42,7 @@ class SHS2L1CParameters(SHParameters):
42
42
  "include": ["id", "properties.datetime", "properties.eo:cloud_cover"],
43
43
  "exclude": [],
44
44
  }
45
-
45
+ FILTER = None
46
46
 
47
47
  class SHS1Parameters(SHParameters):
48
48
  """
@@ -1,7 +1,7 @@
1
1
  import typer
2
2
  import os
3
3
 
4
- from .commands import auth, datasets, models
4
+ from .commands import auth, datasets, models, stac
5
5
  from .repos import APIRepo
6
6
  from . import __version__
7
7
 
@@ -10,7 +10,7 @@ app = typer.Typer(help="Welcome to EOTDL. Learn more at https://www.eotdl.com/")
10
10
  app.add_typer(auth.app, name="auth")
11
11
  app.add_typer(datasets.app, name="datasets")
12
12
  app.add_typer(models.app, name="models")
13
-
13
+ app.add_typer(stac.app, name="stac")
14
14
 
15
15
  @app.command()
16
16
  def version():
@@ -4,39 +4,11 @@ from pathlib import Path
4
4
  from ..datasets import (
5
5
  retrieve_datasets,
6
6
  ingest_dataset,
7
- download_dataset,
7
+ stage_dataset,
8
8
  )
9
9
 
10
10
  app = typer.Typer(help="Explore, ingest and download training datasets.")
11
11
 
12
-
13
- @app.command()
14
- def list(
15
- name: str = typer.Option(
16
- None, "--name", "-n", help="Filter the returned datasets by name"
17
- ),
18
- limit: int = typer.Option(
19
- None, "--limit", "-l", help="Limit the number of returned results"
20
- ),
21
- ):
22
- """
23
- Retrieve a list with all the datasets in the EOTDL.
24
-
25
- If using --name, it will filter the results by name. If no name is provided, it will return all the datasets.\n
26
- If using --limit, it will limit the number of results. If no limit is provided, it will return all the datasets.
27
- \n\n
28
- Examples\n
29
- --------\n
30
- $ eotdl datasets list\n
31
- $ eotdl datasets list --name YourModel --limit 5
32
- """
33
- try:
34
- datasets = retrieve_datasets(name, limit)
35
- typer.echo(datasets)
36
- except Exception as e:
37
- typer.echo(e)
38
-
39
-
40
12
  @app.command()
41
13
  def ingest(
42
14
  path: Path = typer.Option(
@@ -62,7 +34,7 @@ def ingest(
62
34
  ),
63
35
  ):
64
36
  """
65
- Ingest a dataset to the EOTDL.
37
+ Ingest a dataset to the EOTDL.asdf
66
38
 
67
39
  This command ingests the dataset to the EOTDL. The dataset must be a folder with the dataset files,
68
40
  and at least a README.md file (and a catalog.json file for Q1+). If these files are missing, the ingestion
@@ -90,7 +62,32 @@ def ingest(
90
62
  ingest_dataset(path, verbose, typer.echo, foce_metadata_update, sync_metadata)
91
63
  except Exception as e:
92
64
  typer.echo(e)
65
+
66
+ @app.command()
67
+ def list(
68
+ name: str = typer.Option(
69
+ None, "--name", "-n", help="Filter the returned datasets by name"
70
+ ),
71
+ limit: int = typer.Option(
72
+ None, "--limit", "-l", help="Limit the number of returned results"
73
+ ),
74
+ ):
75
+ """
76
+ Retrieve a list with all the datasets in the EOTDL.
93
77
 
78
+ If using --name, it will filter the results by name. If no name is provided, it will return all the datasets.\n
79
+ If using --limit, it will limit the number of results. If no limit is provided, it will return all the datasets.
80
+ \n\n
81
+ Examples\n
82
+ --------\n
83
+ $ eotdl datasets list\n
84
+ $ eotdl datasets list --name YourModel --limit 5
85
+ """
86
+ try:
87
+ datasets = retrieve_datasets(name, limit)
88
+ typer.echo(datasets)
89
+ except Exception as e:
90
+ typer.echo(e)
94
91
 
95
92
  @app.command()
96
93
  def get(
@@ -130,7 +127,7 @@ def get(
130
127
  $ eotdl dataset get YourDataset --path /path/to/download --file dataset.zip --version 1 --assets True --force True --verbose True
131
128
  """
132
129
  try:
133
- dst_path = download_dataset(
130
+ dst_path = stage_dataset(
134
131
  dataset,
135
132
  version,
136
133
  path,
@@ -4,39 +4,11 @@ from pathlib import Path
4
4
  from ..models import (
5
5
  retrieve_models,
6
6
  ingest_model,
7
- download_model,
7
+ stage_model,
8
8
  )
9
9
 
10
10
  app = typer.Typer(help="Explore, ingest and download ML models.")
11
11
 
12
-
13
- @app.command()
14
- def list(
15
- name: str = typer.Option(
16
- None, "--name", "-n", help="Filter the returned models by name"
17
- ),
18
- limit: int = typer.Option(
19
- None, "--limit", "-l", help="Limit the number of returned results"
20
- ),
21
- ):
22
- """
23
- Retrieve a list with all the models in the EOTDL.
24
-
25
- If using --name, it will filter the results by name. If no name is provided, it will return all the models.\n
26
- If using --limit, it will limit the number of results. If no limit is provided, it will return all the models.
27
- \n\n
28
- Examples\n
29
- --------\n
30
- $ eotdl models list\n
31
- $ eotdl models list --name YourModel --limit 5
32
- """
33
- try:
34
- models = retrieve_models(name, limit)
35
- typer.echo(models)
36
- except Exception as e:
37
- typer.echo(e)
38
-
39
-
40
12
  @app.command()
41
13
  def ingest(
42
14
  path: Path = typer.Option(..., "--path", "-p", help="Path to the model to ingest"),
@@ -88,6 +60,31 @@ def ingest(
88
60
  except Exception as e:
89
61
  typer.echo(e)
90
62
 
63
+ @app.command()
64
+ def list(
65
+ name: str = typer.Option(
66
+ None, "--name", "-n", help="Filter the returned models by name"
67
+ ),
68
+ limit: int = typer.Option(
69
+ None, "--limit", "-l", help="Limit the number of returned results"
70
+ ),
71
+ ):
72
+ """
73
+ Retrieve a list with all the models in the EOTDL.
74
+
75
+ If using --name, it will filter the results by name. If no name is provided, it will return all the models.\n
76
+ If using --limit, it will limit the number of results. If no limit is provided, it will return all the models.
77
+ \n\n
78
+ Examples\n
79
+ --------\n
80
+ $ eotdl models list\n
81
+ $ eotdl models list --name YourModel --limit 5
82
+ """
83
+ try:
84
+ models = retrieve_models(name, limit)
85
+ typer.echo(models)
86
+ except Exception as e:
87
+ typer.echo(e)
91
88
 
92
89
  @app.command()
93
90
  def get(
@@ -127,7 +124,7 @@ def get(
127
124
  $ eotdl models get YourModel --path /path/to/download --file model.zip --version 1 --assets True --force True --verbose True
128
125
  """
129
126
  try:
130
- dst_path = download_model(
127
+ dst_path = stage_model(
131
128
  model, version, path, typer.echo, assets, force, verbose
132
129
  )
133
130
  typer.echo(f"Data available at {dst_path}")
@@ -0,0 +1,57 @@
1
+ import typer
2
+ from typing import Optional
3
+
4
+ from ..curation.stac.api import api_status, search_stac_columns, retrieve_stac_collections, retrieve_stac_collection, retrieve_stac_items, retrieve_stac_item, search_stac_items
5
+
6
+ app = typer.Typer(help="EOTDL STAC API")
7
+
8
+ @app.command()
9
+ def status():
10
+ try:
11
+ data = api_status()
12
+ typer.echo(data)
13
+ except Exception as e:
14
+ typer.echo(e)
15
+ raise typer.Abort()
16
+
17
+ @app.command()
18
+ def collections():
19
+ try:
20
+ data = retrieve_stac_collections()
21
+ typer.echo(data)
22
+ except Exception as e:
23
+ typer.echo(e)
24
+ raise typer.Abort()
25
+
26
+ @app.command()
27
+ def collection(collection_id: str):
28
+ try:
29
+ data = retrieve_stac_collection(collection_id)
30
+ typer.echo(data)
31
+ except Exception as e:
32
+ typer.echo(e)
33
+ raise typer.Abort()
34
+
35
+ @app.command()
36
+ def items(collection_id: str):
37
+ try:
38
+ data = retrieve_stac_items(collection_id)
39
+ typer.echo(data)
40
+ except Exception as e:
41
+ typer.echo(e)
42
+
43
+ @app.command()
44
+ def item(collection_id: str, item_id: str):
45
+ try:
46
+ data = retrieve_stac_item(collection_id, item_id)
47
+ typer.echo(data)
48
+ except Exception as e:
49
+ typer.echo(e)
50
+
51
+ @app.command()
52
+ def search(collection_id: str, query: Optional[str] = None):
53
+ try:
54
+ data = search_stac_items(collection_id, query)
55
+ typer.echo(data)
56
+ except Exception as e:
57
+ typer.echo(e)
File without changes
@@ -0,0 +1 @@
1
+ from .stac import create_stac_catalog
@@ -0,0 +1,58 @@
1
+ import json
2
+
3
+ from ...repos import STACAPIRepo
4
+
5
+ def api_status():
6
+ repo = STACAPIRepo()
7
+ data, error = repo.status()
8
+ if error:
9
+ raise Exception(error)
10
+ return data
11
+
12
+ def retrieve_stac_collections():
13
+ repo = STACAPIRepo()
14
+ data, error = repo.collections()
15
+ if error:
16
+ raise Exception(error)
17
+ return data
18
+
19
+ def retrieve_stac_collection(collection_id):
20
+ repo = STACAPIRepo()
21
+ data, error = repo.collection(collection_id)
22
+ if error:
23
+ raise Exception(error)
24
+ return data
25
+
26
+ def retrieve_stac_items(collection_id):
27
+ repo = STACAPIRepo()
28
+ data, error = repo.items(collection_id)
29
+ if error:
30
+ raise Exception(error)
31
+ return data
32
+
33
+ def retrieve_stac_item(collection_id, item_id):
34
+ repo = STACAPIRepo()
35
+ data, error = repo.item(collection_id, item_id)
36
+ if error:
37
+ raise Exception(error)
38
+ return data
39
+
40
+ def search_stac_items(collection_id, query = None):
41
+ repo = STACAPIRepo()
42
+ if query is None:
43
+ data, error = repo.search_columns(collection_id)
44
+ if error:
45
+ raise Exception(error)
46
+ return data
47
+ data, error = repo.search(collection_id, str(query))
48
+ if error:
49
+ raise Exception(error)
50
+ return json.loads(data)
51
+
52
+
53
+ def search_stac_columns(collection_id):
54
+ repo = STACAPIRepo()
55
+ data, error = repo.search_columns(collection_id)
56
+ if error:
57
+ raise Exception(error)
58
+ return data
@@ -0,0 +1,33 @@
1
+ import pyarrow.parquet as pq
2
+ import stac_geoparquet
3
+ import json
4
+ from tqdm import tqdm
5
+ import pystac
6
+ from datetime import datetime
7
+
8
+ def create_stac_catalog(parquet_catalog_path, stac_catalog = None):
9
+ # parse items and add to collection
10
+ table = pq.read_table(parquet_catalog_path)
11
+ items = []
12
+ for item in tqdm(stac_geoparquet.arrow.stac_table_to_items(table), total=len(table)):
13
+ item = pystac.Item.from_dict(item)
14
+ item.validate()
15
+ # collection.add_item(item)
16
+ if stac_catalog is not None:
17
+ stac_catalog.add_item(item)
18
+ else:
19
+ items.append(item)
20
+ # path = "data/stac/" + item["id"] + ".json"
21
+ # os.makedirs(os.path.dirname(path), exist_ok=True)
22
+ # with open(path, "w") as f:
23
+ # json.dump(item, f)
24
+ # # save item
25
+ # os.makedirs(path, exist_ok=True)
26
+ # _path = path + '/' + item.id + ".json"
27
+ # os.makedirs(os.path.dirname(_path), exist_ok=True)
28
+ # with open(_path, "w") as f:
29
+ # json.dump(item.to_dict(), f)
30
+ # save catalog
31
+ if stac_catalog is not None:
32
+ return stac_catalog
33
+ return items
@@ -1,3 +1,3 @@
1
1
  from .retrieve import retrieve_datasets, retrieve_dataset, retrieve_dataset_files
2
2
  from .ingest import ingest_dataset
3
- from .download import download_dataset, download_file_url
3
+ from .stage import stage_dataset, stage_dataset_file
@@ -0,0 +1,36 @@
1
+ from pathlib import Path
2
+
3
+ from ..repos import DatasetsAPIRepo
4
+ from ..files.ingest import prep_ingest_stac, prep_ingest_folder, ingest
5
+
6
+ def retrieve_dataset(metadata, user):
7
+ repo = DatasetsAPIRepo()
8
+ data, error = repo.retrieve_dataset(metadata.name)
9
+ # print(data, error)
10
+ if data and data["uid"] != user["uid"]:
11
+ raise Exception("Dataset already exists.")
12
+ if error and error == "Dataset doesn't exist":
13
+ # create dataset
14
+ data, error = repo.create_dataset(metadata.dict(), user)
15
+ # print(data, error)
16
+ if error:
17
+ raise Exception(error)
18
+ return data
19
+
20
+ def ingest_dataset(
21
+ path,
22
+ verbose=False,
23
+ logger=print,
24
+ force_metadata_update=False,
25
+ sync_metadata=False,
26
+ ):
27
+ path = Path(path)
28
+ if not path.is_dir():
29
+ raise Exception("Path must be a folder")
30
+ if "catalog.json" in [f.name for f in path.iterdir()]:
31
+ prep_ingest_stac(path, logger)
32
+ else:
33
+ prep_ingest_folder(path, verbose, logger, force_metadata_update, sync_metadata)
34
+ return ingest(path, DatasetsAPIRepo(), retrieve_dataset, 'datasets')
35
+
36
+
@@ -25,12 +25,3 @@ def retrieve_dataset_files(dataset_id, version):
25
25
  raise Exception(error)
26
26
  return data
27
27
 
28
-
29
- # def list_datasets(pattern=None):
30
- # datasets = retrieve_datasets()
31
- # if pattern:
32
- # regex = re.compile(rf".*{re.escape(pattern)}.*", re.IGNORECASE)
33
- # names = list(datasets.keys())
34
- # valid = [name for name in names if regex.search(name)]
35
- # return {name: datasets[name] for name in valid}
36
- # return datasets
@@ -0,0 +1,64 @@
1
+ import os
2
+ from pathlib import Path
3
+ from tqdm import tqdm
4
+ import geopandas as gpd
5
+
6
+ from ..auth import with_auth
7
+ from .retrieve import retrieve_dataset
8
+ from ..repos import FilesAPIRepo
9
+
10
+ @with_auth
11
+ def stage_dataset(
12
+ dataset_name,
13
+ version=None,
14
+ path=None,
15
+ logger=print,
16
+ assets=False,
17
+ force=False,
18
+ verbose=False,
19
+ user=None,
20
+ file=None,
21
+ ):
22
+ dataset = retrieve_dataset(dataset_name)
23
+ if version is None:
24
+ version = sorted([v['version_id'] for v in dataset["versions"]])[-1]
25
+ else:
26
+ assert version in [
27
+ v["version_id"] for v in dataset["versions"]
28
+ ], f"Version {version} not found"
29
+ download_base_path = os.getenv(
30
+ "EOTDL_DOWNLOAD_PATH", str(Path.home()) + "/.cache/eotdl/datasets"
31
+ )
32
+ if path is None:
33
+ download_path = download_base_path + "/" + dataset_name
34
+ else:
35
+ download_path = path + "/" + dataset_name
36
+ # check if dataset already exists
37
+ if os.path.exists(download_path) and not force:
38
+ os.makedirs(download_path, exist_ok=True)
39
+ # raise Exception(
40
+ # f"Dataset `{dataset['name']} v{str(version)}` already exists at {download_path}. To force download, use force=True or -f in the CLI."
41
+ # )
42
+ raise Exception(
43
+ f"Dataset `{dataset['name']}` already exists at {download_path}. To force download, use force=True or -f in the CLI."
44
+ )
45
+
46
+ # stage metadata
47
+ repo = FilesAPIRepo()
48
+ catalog_path = repo.stage_file(dataset["id"], f"catalog.v{version}.parquet", user, download_path)
49
+
50
+ # TODO: stage README.md
51
+
52
+ if assets:
53
+ gdf = gpd.read_parquet(catalog_path)
54
+ for _, row in tqdm(gdf.iterrows(), total=len(gdf), desc="Staging assets"):
55
+ for k, v in row["assets"].items():
56
+ stage_dataset_file(v["href"], download_path)
57
+
58
+ return download_path
59
+
60
+
61
+ @with_auth
62
+ def stage_dataset_file(file_url, path, user):
63
+ repo = FilesAPIRepo()
64
+ return repo.stage_file_url(file_url, path, user)
File without changes