eotdl 2024.2.15__tar.gz → 2024.3.14.post2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/PKG-INFO +4 -1
  2. eotdl-2024.3.14.post2/eotdl/__init__.py +1 -0
  3. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/cli.py +13 -0
  4. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/commands/datasets.py +26 -6
  5. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/commands/models.py +14 -1
  6. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/datasets/download.py +7 -2
  7. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/datasets/ingest.py +69 -10
  8. eotdl-2024.3.14.post2/eotdl/datasets/metadata.py +44 -0
  9. eotdl-2024.3.14.post2/eotdl/datasets/update.py +17 -0
  10. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/files/ingest.py +8 -3
  11. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/models/download.py +7 -3
  12. eotdl-2024.3.14.post2/eotdl/models/ingest.py +103 -0
  13. eotdl-2024.3.14.post2/eotdl/models/metadata.py +44 -0
  14. eotdl-2024.3.14.post2/eotdl/models/update.py +17 -0
  15. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/repos/APIRepo.py +5 -0
  16. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/repos/DatasetsAPIRepo.py +16 -0
  17. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/repos/ModelsAPIRepo.py +16 -0
  18. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/pyproject.toml +4 -1
  19. eotdl-2024.2.15/eotdl/__init__.py +0 -1
  20. eotdl-2024.2.15/eotdl/datasets/metadata.py +0 -16
  21. eotdl-2024.2.15/eotdl/models/ingest.py +0 -47
  22. eotdl-2024.2.15/eotdl/models/metadata.py +0 -16
  23. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/README.md +0 -0
  24. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/access/__init__.py +0 -0
  25. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/access/airbus/__init__.py +0 -0
  26. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/access/airbus/client.py +0 -0
  27. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/access/airbus/parameters.py +0 -0
  28. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/access/airbus/utils.py +0 -0
  29. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/access/download.py +0 -0
  30. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/access/search.py +0 -0
  31. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/access/sentinelhub/__init__.py +0 -0
  32. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/access/sentinelhub/client.py +0 -0
  33. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/access/sentinelhub/evalscripts.py +0 -0
  34. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/access/sentinelhub/parameters.py +0 -0
  35. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/access/sentinelhub/utils.py +0 -0
  36. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/auth/__init__.py +0 -0
  37. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/auth/auth.py +0 -0
  38. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/auth/errors.py +0 -0
  39. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/auth/is_logged.py +0 -0
  40. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/auth/logout.py +0 -0
  41. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/commands/__init__.py +0 -0
  42. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/commands/auth.py +0 -0
  43. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/curation/__init__.py +0 -0
  44. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/curation/stac/__init__.py +0 -0
  45. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/curation/stac/assets.py +0 -0
  46. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/curation/stac/dataframe.py +0 -0
  47. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/curation/stac/dataframe_bck.py +0 -0
  48. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/curation/stac/dataframe_labeling.py +0 -0
  49. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/curation/stac/extensions/__init__.py +0 -0
  50. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/curation/stac/extensions/base.py +0 -0
  51. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/curation/stac/extensions/dem.py +0 -0
  52. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/curation/stac/extensions/eo.py +0 -0
  53. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/curation/stac/extensions/label/__init__.py +0 -0
  54. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/curation/stac/extensions/label/base.py +0 -0
  55. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/curation/stac/extensions/label/image_name_labeler.py +0 -0
  56. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/curation/stac/extensions/label/scaneo.py +0 -0
  57. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/curation/stac/extensions/ml_dataset.py +0 -0
  58. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/curation/stac/extensions/projection.py +0 -0
  59. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/curation/stac/extensions/raster.py +0 -0
  60. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/curation/stac/extensions/sar.py +0 -0
  61. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/curation/stac/extent.py +0 -0
  62. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/curation/stac/parsers.py +0 -0
  63. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/curation/stac/stac.py +0 -0
  64. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/datasets/__init__.py +0 -0
  65. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/datasets/retrieve.py +0 -0
  66. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/files/__init__.py +0 -0
  67. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/models/__init__.py +0 -0
  68. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/models/retrieve.py +0 -0
  69. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/repos/AuthAPIRepo.py +0 -0
  70. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/repos/AuthRepo.py +0 -0
  71. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/repos/FilesAPIRepo.py +0 -0
  72. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/repos/__init__.py +0 -0
  73. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/shared/__init__.py +0 -0
  74. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/shared/checksum.py +0 -0
  75. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/tools/__init__.py +0 -0
  76. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/tools/geo_utils.py +0 -0
  77. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/tools/metadata.py +0 -0
  78. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/tools/paths.py +0 -0
  79. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/tools/stac.py +0 -0
  80. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/tools/time_utils.py +0 -0
  81. {eotdl-2024.2.15 → eotdl-2024.3.14.post2}/eotdl/tools/tools.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: eotdl
3
- Version: 2024.2.15
3
+ Version: 2024.3.14.post2
4
4
  Summary: Earth Observation Training Data Lab
5
5
  License: MIT
6
6
  Author: EarthPulse
@@ -15,10 +15,13 @@ Classifier: Programming Language :: Python :: 3.11
15
15
  Requires-Dist: black (>=23.10.1,<24.0.0)
16
16
  Requires-Dist: geomet (>=1.0.0,<2.0.0)
17
17
  Requires-Dist: geopandas (>=0.13.2,<0.14.0)
18
+ Requires-Dist: markdown (>=3.5.2,<4.0.0)
19
+ Requires-Dist: markdownify (>=0.11.6,<0.12.0)
18
20
  Requires-Dist: mypy (>=1.6.1,<2.0.0)
19
21
  Requires-Dist: pydantic (>=1.10.6,<2.0.0)
20
22
  Requires-Dist: pyjwt (>=2.6.0,<3.0.0)
21
23
  Requires-Dist: pystac[validation] (==1.8.2)
24
+ Requires-Dist: python-frontmatter (>=1.1.0,<2.0.0)
22
25
  Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
23
26
  Requires-Dist: rasterio (>=1.3.9,<2.0.0)
24
27
  Requires-Dist: requests (>=2.28.2,<3.0.0)
@@ -0,0 +1 @@
1
+ __version__ = "2024.03.14-2"
@@ -1,5 +1,8 @@
1
1
  import typer
2
+ import os
3
+
2
4
  from .commands import auth, datasets, models
5
+ from .repos import APIRepo
3
6
  from . import __version__
4
7
 
5
8
  app = typer.Typer(help="Welcome to EOTDL. Learn more at https://www.eotdl.com/")
@@ -17,5 +20,15 @@ def version():
17
20
  typer.echo(f"EOTDL Version: {__version__}")
18
21
 
19
22
 
23
+ @app.command()
24
+ def api():
25
+ """
26
+ Get EOTDL API URL and info.
27
+ """
28
+ repo = APIRepo()
29
+ typer.echo(f"EOTDL API URL: {repo.url}")
30
+ typer.echo(repo.get_info())
31
+
32
+
20
33
  if __name__ == "__main__":
21
34
  app()
@@ -45,14 +45,27 @@ def ingest(
45
45
  verbose: bool = typer.Option(
46
46
  False,
47
47
  "--verbose",
48
+ "-v",
48
49
  help="Verbose output. This will print the progress of the ingestion",
49
50
  ),
51
+ foce_metadata_update: bool = typer.Option(
52
+ False,
53
+ "--force-metadata-update",
54
+ "-f",
55
+ help="Force metadata update even if it already exists. Will overwrite the current metadata in EOTDL",
56
+ ),
57
+ sync_metadata: bool = typer.Option(
58
+ False,
59
+ "--sync-metadata",
60
+ "-s",
61
+ help="Sync local metadata with the EOTDL. Will overwrite the local metadata",
62
+ ),
50
63
  ):
51
64
  """
52
65
  Ingest a dataset to the EOTDL.
53
66
 
54
67
  This command ingests the dataset to the EOTDL. The dataset must be a folder with the dataset files,
55
- and at least a metadata.yml file or a catalog.json file. If there are not these files, the ingestion
68
+ and at least a README.md file (and a catalog.json file for Q1+). If these files are missing, the ingestion
56
69
  will not work. All the files in the folder will be uploaded to the EOTDL.
57
70
  \n\n
58
71
  The following constraints apply to the dataset name:\n
@@ -60,12 +73,13 @@ def ingest(
60
73
  - It must be between 3 and 45 characters long\n
61
74
  - It can only contain alphanumeric characters and dashes.\n
62
75
  \n
63
- The metadata.yml file must contain the following fields:\n
76
+ The README.md file must contain the following fields in the metadata header:\n
64
77
  - name: the name of the dataset\n
65
78
  - authors: the author or authors of the dataset\n
66
79
  - license: the license of the dataset\n
67
80
  - source: the source of the dataset\n
68
- \n
81
+ - thumbnail: an image to use as the thumbnail of the dataset in the website\n
82
+ The rest of the content in the README.md file will be used as the description of the dataset in the website.
69
83
  If using --verbose, it will print the progress of the ingestion.
70
84
  \n\n
71
85
  Examples\n
@@ -73,7 +87,7 @@ def ingest(
73
87
  $ eotdl dataset ingest --path /path/to/folder-with-dataset --verbose True
74
88
  """
75
89
  try:
76
- ingest_dataset(path, verbose, typer.echo)
90
+ ingest_dataset(path, verbose, typer.echo, foce_metadata_update, sync_metadata)
77
91
  except Exception as e:
78
92
  typer.echo(e)
79
93
 
@@ -108,7 +122,7 @@ def get(
108
122
  If using --version, it will download the specified version. If no version is provided, it will download the latest version.\n
109
123
  If using --assets when the dataset is STAC, it will also download the STAC assets of the dataset. If not provided, it will only download the STAC metadata.\n
110
124
  If using --force, it will download the dataset even if the file already exists.\n
111
- If using --verbose, it will print the progress of the download.
125
+ If using --verbose, it will print the progress of the download.\n
112
126
  \n\n
113
127
  Examples\n
114
128
  --------\n
@@ -117,7 +131,13 @@ def get(
117
131
  """
118
132
  try:
119
133
  dst_path = download_dataset(
120
- dataset, version, path, typer.echo, assets, force, verbose
134
+ dataset,
135
+ version,
136
+ path,
137
+ typer.echo,
138
+ assets,
139
+ force,
140
+ verbose,
121
141
  )
122
142
  typer.echo(f"Data available at {dst_path}")
123
143
  except Exception as e:
@@ -45,6 +45,18 @@ def ingest(
45
45
  "--verbose",
46
46
  help="Verbose output. This will print the progress of the ingestion",
47
47
  ),
48
+ foce_metadata_update: bool = typer.Option(
49
+ False,
50
+ "--force-metadata-update",
51
+ "-f",
52
+ help="Force metadata update even if it already exists. Will overwrite the current metadata in EOTDL",
53
+ ),
54
+ sync_metadata: bool = typer.Option(
55
+ False,
56
+ "--sync-metadata",
57
+ "-s",
58
+ help="Sync local metadata with the EOTDL. Will overwrite the local metadata",
59
+ ),
48
60
  ):
49
61
  """
50
62
  Ingest a model to the EOTDL.
@@ -63,6 +75,7 @@ def ingest(
63
75
  - authors: the author or authors of the model\n
64
76
  - license: the license of the model\n
65
77
  - source: the source of the model\n
78
+ - thumbnail: an image to use as the thumbnail of the dataset in the website\n
66
79
  \n
67
80
  If using --verbose, it will print the progress of the ingestion.
68
81
  \n\n
@@ -71,7 +84,7 @@ def ingest(
71
84
  $ eotdl models ingest --path /path/to/folder-with-model --verbose True
72
85
  """
73
86
  try:
74
- ingest_model(path, verbose, typer.echo)
87
+ ingest_model(path, verbose, typer.echo, foce_metadata_update, sync_metadata)
75
88
  except Exception as e:
76
89
  typer.echo(e)
77
90
 
@@ -6,6 +6,7 @@ from ..auth import with_auth
6
6
  from .retrieve import retrieve_dataset, retrieve_dataset_files
7
7
  from ..repos import FilesAPIRepo, DatasetsAPIRepo
8
8
  from ..curation.stac import STACDataFrame
9
+ from .metadata import generate_metadata
9
10
 
10
11
 
11
12
  @with_auth
@@ -61,8 +62,7 @@ def download_dataset(
61
62
  )
62
63
  # if calculate_checksum(dst_path) != checksum:
63
64
  # logger(f"Checksum for {file} does not match")
64
- if verbose:
65
- logger("Done")
65
+
66
66
  else:
67
67
  # raise NotImplementedError("Downloading a STAC dataset is not implemented")
68
68
  if verbose:
@@ -94,6 +94,11 @@ def download_dataset(
94
94
  else:
95
95
  if verbose:
96
96
  logger("To download assets, set assets=True or -a in the CLI.")
97
+ if verbose:
98
+ logger("Generating README.md ...")
99
+ generate_metadata(download_path, dataset)
100
+ if verbose:
101
+ logger("Done")
97
102
  return download_path
98
103
 
99
104
 
@@ -2,6 +2,8 @@ from pathlib import Path
2
2
  import yaml
3
3
  from tqdm import tqdm
4
4
  import json
5
+ import frontmatter
6
+ import markdown
5
7
 
6
8
  from ..auth import with_auth
7
9
  from .metadata import Metadata
@@ -9,15 +11,23 @@ from ..repos import DatasetsAPIRepo, FilesAPIRepo
9
11
  from ..files import ingest_files, create_new_version
10
12
  from ..curation.stac import STACDataFrame
11
13
  from ..shared import calculate_checksum
14
+ from .update import update_dataset
15
+ from .metadata import generate_metadata
12
16
 
13
17
 
14
- def ingest_dataset(path, verbose=False, logger=print):
18
+ def ingest_dataset(
19
+ path,
20
+ verbose=False,
21
+ logger=print,
22
+ force_metadata_update=False,
23
+ sync_metadata=False,
24
+ ):
15
25
  path = Path(path)
16
26
  if not path.is_dir():
17
27
  raise Exception("Path must be a folder")
18
28
  if "catalog.json" in [f.name for f in path.iterdir()]:
19
29
  return ingest_stac(path / "catalog.json", logger)
20
- return ingest_folder(path, verbose, logger)
30
+ return ingest_folder(path, verbose, logger, force_metadata_update, sync_metadata)
21
31
 
22
32
 
23
33
  def retrieve_dataset(metadata, user):
@@ -33,23 +43,72 @@ def retrieve_dataset(metadata, user):
33
43
  if error:
34
44
  raise Exception(error)
35
45
  data["id"] = data["dataset_id"]
36
- return data["id"]
46
+ return data
37
47
 
38
48
 
39
49
  @with_auth
40
- def ingest_folder(folder, verbose=False, logger=print, user=None):
50
+ def ingest_folder(
51
+ folder,
52
+ verbose=False,
53
+ logger=print,
54
+ force_metadata_update=False,
55
+ sync_metadata=False,
56
+ user=None,
57
+ ):
41
58
  repo = DatasetsAPIRepo()
42
- # load metadata
43
- metadata = yaml.safe_load(open(folder.joinpath("metadata.yml"), "r").read()) or {}
44
- metadata = Metadata(**metadata)
59
+ try:
60
+ readme = frontmatter.load(folder.joinpath("README.md"))
61
+ metadata, content = readme.metadata, readme.content
62
+ metadata = Metadata(**metadata)
63
+ except FileNotFoundError:
64
+ # load metadata (legacy)
65
+ metadata = (
66
+ yaml.safe_load(open(folder.joinpath("metadata.yml"), "r").read()) or {}
67
+ )
68
+ metadata = Metadata(**metadata)
69
+ content = None
70
+ except Exception as e:
71
+ raise Exception("Error loading metadata: " + str(e))
45
72
  # retrieve dataset (create if doesn't exist)
46
- dataset_id = retrieve_dataset(metadata, user)
47
- # ingest files
73
+ dataset = retrieve_dataset(metadata, user)
74
+ if content:
75
+ content = markdown.markdown(content)
76
+ update_metadata = True
77
+ if "description" in dataset:
78
+ # do not do this if the dataset is new, only if it already exists
79
+ update_metadata = check_metadata(
80
+ dataset, metadata, content, force_metadata_update, sync_metadata, folder
81
+ )
82
+ if update_metadata:
83
+ update_dataset(dataset["id"], metadata, content, user)
48
84
  return ingest_files(
49
- repo, dataset_id, folder, verbose, logger, user, endpoint="datasets"
85
+ repo, dataset["id"], folder, verbose, logger, user, endpoint="datasets"
50
86
  )
51
87
 
52
88
 
89
+ def check_metadata(
90
+ dataset, metadata, content, force_metadata_update, sync_metadata, folder
91
+ ):
92
+ if (
93
+ dataset["name"] != metadata.name
94
+ or dataset["description"] != content
95
+ or dataset["authors"] != metadata.authors
96
+ or dataset["source"] != metadata.source
97
+ or dataset["license"] != metadata.license
98
+ or dataset["thumbnail"] != metadata.thumbnail
99
+ ):
100
+ if not force_metadata_update and not sync_metadata:
101
+ raise Exception(
102
+ "The provided metadata is not consistent with the current metadata. Use -f to force metadata update or -s to sync your local metadata."
103
+ )
104
+ if force_metadata_update:
105
+ return True
106
+ if sync_metadata:
107
+ generate_metadata(str(folder), dataset)
108
+ return False
109
+ return False
110
+
111
+
53
112
  def retrieve_stac_dataset(dataset_name, user):
54
113
  repo = DatasetsAPIRepo()
55
114
  data, error = repo.retrieve_dataset(dataset_name)
@@ -0,0 +1,44 @@
1
+ from pydantic import BaseModel, validator
2
+ from typing import List, Optional
3
+ import markdownify
4
+ from pathlib import Path
5
+
6
+
7
+ class Metadata(BaseModel):
8
+ authors: List[str]
9
+ license: str
10
+ source: str
11
+ name: str
12
+ thumbnail: Optional[str] = ""
13
+
14
+ # validate source is a URL
15
+ @validator("source")
16
+ def source_is_url(cls, v):
17
+ if not v.startswith("http") and not v.startswith("https"):
18
+ raise ValueError("source must be a URL")
19
+ return v
20
+
21
+ # validate thumbnail is a url
22
+ @validator("thumbnail")
23
+ def thumbnail_is_url(cls, v):
24
+ if not v.startswith("http") and not v.startswith("https"):
25
+ raise ValueError("thumbnail must be a URL")
26
+ return v
27
+
28
+
29
+ def generate_metadata(download_path, dataset):
30
+ with open(download_path + "/README.md", "w") as f:
31
+ f.write("---\n")
32
+ f.write(f"name: {dataset['name']}\n")
33
+ f.write(f"license: {dataset['license']}\n")
34
+ f.write(f"source: {dataset['source']}\n")
35
+ f.write(f"thumbnail: {dataset['thumbnail']}\n")
36
+ f.write(f"authors:\n")
37
+ for author in dataset["authors"]:
38
+ f.write(f" - {author}\n")
39
+ f.write("---\n")
40
+ f.write(markdownify.markdownify(dataset["description"], heading_style="ATX"))
41
+ # remove metadata.yml if exists
42
+ if Path(download_path + "/metadata.yml").exists():
43
+ Path(download_path + "/metadata.yml").unlink()
44
+ return download_path + "/README.md"
@@ -0,0 +1,17 @@
1
+ from ..repos import DatasetsAPIRepo
2
+
3
+
4
+ def update_dataset(dataset_id, metadata, content, user):
5
+ repo = DatasetsAPIRepo()
6
+ data, error = repo.update_dataset(
7
+ dataset_id,
8
+ metadata.authors,
9
+ metadata.source,
10
+ metadata.license,
11
+ metadata.thumbnail,
12
+ content,
13
+ user,
14
+ )
15
+ if error:
16
+ raise Exception(error)
17
+ return data
@@ -13,8 +13,13 @@ from ..shared import calculate_checksum
13
13
  def retrieve_files(folder):
14
14
  # get all files in directory recursively
15
15
  items = [Path(item) for item in glob(str(folder) + "/**/*", recursive=True)]
16
- if not any(item.name == "metadata.yml" for item in items):
17
- raise Exception("metadata.yml not found in directory")
16
+ if not any(item.name == "metadata.yml" for item in items) and not any(
17
+ item.name == "README.md" for item in items
18
+ ):
19
+ raise Exception("README.md not found in directory")
20
+ # remove metadata files
21
+ items = [item for item in items if item.name != "metadata.yml"]
22
+ items = [item for item in items if item.name != "README.md"]
18
23
  # remove directories
19
24
  items = [item for item in items if not item.is_dir()]
20
25
  if len(items) == 0:
@@ -129,7 +134,7 @@ def ingest_files(repo, dataset_or_model_id, folder, verbose, logger, user, endpo
129
134
  parts,
130
135
  endpoint,
131
136
  )
132
- files_repo.complete_upload(user, upload_id, version, endpoint)
137
+ data, error = files_repo.complete_upload(user, upload_id, version, endpoint)
133
138
  # ingest new small files in batches
134
139
  if len(upload_files) > 0:
135
140
  logger("generating batches...")
@@ -6,6 +6,7 @@ from ..auth import with_auth
6
6
  from .retrieve import retrieve_model, retrieve_model_files
7
7
  from ..shared import calculate_checksum
8
8
  from ..repos import FilesAPIRepo
9
+ from .metadata import generate_metadata
9
10
 
10
11
 
11
12
  @with_auth
@@ -75,9 +76,6 @@ def download_model(
75
76
  )
76
77
  # if calculate_checksum(dst_path) != checksum:
77
78
  # logger(f"Checksum for {file} does not match")
78
- if verbose:
79
- logger("Done")
80
- return "/".join(dst_path.split("/")[:-1])
81
79
  else:
82
80
  raise NotImplementedError("Downloading a STAC model is not implemented")
83
81
  # logger("Downloading STAC metadata...")
@@ -108,3 +106,9 @@ def download_model(
108
106
  # else:
109
107
  # logger("To download assets, set assets=True or -a in the CLI.")
110
108
  # return Outputs(dst_path=path)
109
+ if verbose:
110
+ logger("Generating README.md ...")
111
+ generate_metadata(download_path, model)
112
+ if verbose:
113
+ logger("Done")
114
+ return download_path
@@ -0,0 +1,103 @@
1
+ from pathlib import Path
2
+ import yaml
3
+ import frontmatter
4
+ import markdown
5
+
6
+ from ..auth import with_auth
7
+ from .metadata import Metadata, generate_metadata
8
+ from ..repos import ModelsAPIRepo
9
+ from ..shared import calculate_checksum
10
+ from ..files import ingest_files
11
+ from .update import update_model
12
+
13
+
14
+ def ingest_model(
15
+ path, verbose=False, logger=print, force_metadata_update=False, sync_metadata=False
16
+ ):
17
+ path = Path(path)
18
+ if not path.is_dir():
19
+ raise Exception("Path must be a folder")
20
+ # if "catalog.json" in [f.name for f in path.iterdir()]:
21
+ # return ingest_stac(path / "catalog.json", logger)
22
+ return ingest_folder(path, verbose, logger, force_metadata_update, sync_metadata)
23
+
24
+
25
+ def retrieve_model(metadata, user):
26
+ repo = ModelsAPIRepo()
27
+ data, error = repo.retrieve_model(metadata.name)
28
+ # print(data, error)
29
+ if data and data["uid"] != user["uid"]:
30
+ raise Exception("Model already exists.")
31
+ if error and error == "Model doesn't exist":
32
+ # create dataset
33
+ data, error = repo.create_model(metadata.dict(), user)
34
+ # print(data, error)
35
+ if error:
36
+ raise Exception(error)
37
+ data["id"] = data["model_id"]
38
+ return data
39
+
40
+
41
+ @with_auth
42
+ def ingest_folder(
43
+ folder,
44
+ verbose=False,
45
+ logger=print,
46
+ force_metadata_update=False,
47
+ sync_metadata=False,
48
+ user=None,
49
+ ):
50
+ repo = ModelsAPIRepo()
51
+ # load metadata
52
+ try:
53
+ readme = frontmatter.load(folder.joinpath("README.md"))
54
+ metadata, content = readme.metadata, readme.content
55
+ metadata = Metadata(**metadata)
56
+ except FileNotFoundError:
57
+ # load metadata (legacy)
58
+ metadata = (
59
+ yaml.safe_load(open(folder.joinpath("metadata.yml"), "r").read()) or {}
60
+ )
61
+ metadata = Metadata(**metadata)
62
+ content = None
63
+ except Exception as e:
64
+ raise Exception("Error loading metadata: " + str(e))
65
+ # retrieve model (create if doesn't exist)
66
+ model = retrieve_model(metadata, user)
67
+ if content:
68
+ content = markdown.markdown(content)
69
+ update_metadata = True
70
+ if "description" in model:
71
+ # do not do this if the model is new, only if it already exists
72
+ update_metadata = check_metadata(
73
+ model, metadata, content, force_metadata_update, sync_metadata, folder
74
+ )
75
+ if update_metadata:
76
+ update_model(model["id"], metadata, content, user)
77
+ # ingest files
78
+ return ingest_files(
79
+ repo, model["id"], folder, verbose, logger, user, endpoint="models"
80
+ )
81
+
82
+
83
+ def check_metadata(
84
+ dataset, metadata, content, force_metadata_update, sync_metadata, folder
85
+ ):
86
+ if (
87
+ dataset["name"] != metadata.name
88
+ or dataset["description"] != content
89
+ or dataset["authors"] != metadata.authors
90
+ or dataset["source"] != metadata.source
91
+ or dataset["license"] != metadata.license
92
+ or dataset["thumbnail"] != metadata.thumbnail
93
+ ):
94
+ if not force_metadata_update and not sync_metadata:
95
+ raise Exception(
96
+ "The provided metadata is not consistent with the current metadata. Use -f to force metadata update or -s to sync your local metadata."
97
+ )
98
+ if force_metadata_update:
99
+ return True
100
+ if sync_metadata:
101
+ generate_metadata(str(folder), dataset)
102
+ return False
103
+ return False
@@ -0,0 +1,44 @@
1
+ from pydantic import BaseModel, validator
2
+ from typing import List
3
+ import markdownify
4
+ from pathlib import Path
5
+
6
+
7
+ class Metadata(BaseModel):
8
+ authors: List[str]
9
+ license: str
10
+ source: str
11
+ name: str
12
+ thumbnail: str
13
+
14
+ # validate source is a URL
15
+ @validator("source")
16
+ def source_is_url(cls, v):
17
+ if not v.startswith("http") and not v.startswith("https"):
18
+ raise ValueError("source must be a URL")
19
+ return v
20
+
21
+ # validate thumbnail is a url
22
+ @validator("thumbnail")
23
+ def thumbnail_is_url(cls, v):
24
+ if not v.startswith("http") and not v.startswith("https"):
25
+ raise ValueError("thumbnail must be a URL")
26
+ return v
27
+
28
+
29
+ def generate_metadata(download_path, model):
30
+ with open(download_path + "/README.md", "w") as f:
31
+ f.write("---\n")
32
+ f.write(f"name: {model['name']}\n")
33
+ f.write(f"license: {model['license']}\n")
34
+ f.write(f"source: {model['source']}\n")
35
+ f.write(f"thumbnail: {model['thumbnail']}\n")
36
+ f.write(f"authors:\n")
37
+ for author in model["authors"]:
38
+ f.write(f" - {author}\n")
39
+ f.write("---\n")
40
+ f.write(markdownify.markdownify(model["description"], heading_style="ATX"))
41
+ # remove metadata.yml if exists
42
+ if Path(download_path + "/metadata.yml").exists():
43
+ Path(download_path + "/metadata.yml").unlink()
44
+ return download_path + "/README.md"
@@ -0,0 +1,17 @@
1
+ from ..repos import ModelsAPIRepo
2
+
3
+
4
+ def update_model(model_id, metadata, content, user):
5
+ repo = ModelsAPIRepo()
6
+ data, error = repo.update_model(
7
+ model_id,
8
+ metadata.authors,
9
+ metadata.source,
10
+ metadata.license,
11
+ metadata.thumbnail,
12
+ content,
13
+ user,
14
+ )
15
+ if error:
16
+ raise Exception(error)
17
+ return data
@@ -1,4 +1,5 @@
1
1
  import os
2
+ import requests
2
3
 
3
4
 
4
5
  class APIRepo:
@@ -18,3 +19,7 @@ class APIRepo:
18
19
  if "id_token" in data:
19
20
  return {"Authorization": "Bearer " + data["id_token"]}
20
21
  raise Exception("Invalid headers")
22
+
23
+ def get_info(self):
24
+ response = requests.get(self.url)
25
+ return self.format_response(response)
@@ -62,3 +62,19 @@ class DatasetsAPIRepo(APIRepo):
62
62
  if response.status_code != 200:
63
63
  return None, response.json()["detail"]
64
64
  return gpd.GeoDataFrame.from_features(response.json()["features"]), None
65
+
66
+ def update_dataset(
67
+ self, dataset_id, authors, source, license, thumbnail, content, user
68
+ ):
69
+ response = requests.put(
70
+ self.url + f"datasets/{dataset_id}",
71
+ json={
72
+ "authors": authors,
73
+ "source": source,
74
+ "license": license,
75
+ "thumbnail": thumbnail,
76
+ "description": content,
77
+ },
78
+ headers=self.generate_headers(user),
79
+ )
80
+ return self.format_response(response)
@@ -37,3 +37,19 @@ class ModelsAPIRepo(APIRepo):
37
37
  headers=self.generate_headers(user),
38
38
  )
39
39
  return self.format_response(response)
40
+
41
+ def update_model(
42
+ self, model_id, authors, source, license, thumbnail, content, user
43
+ ):
44
+ response = requests.put(
45
+ self.url + f"models/{model_id}",
46
+ json={
47
+ "authors": authors,
48
+ "source": source,
49
+ "license": license,
50
+ "thumbnail": thumbnail,
51
+ "description": content,
52
+ },
53
+ headers=self.generate_headers(user),
54
+ )
55
+ return self.format_response(response)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "eotdl"
3
- version = "2024.02.15"
3
+ version = "2024.03.14-2"
4
4
  description = "Earth Observation Training Data Lab"
5
5
  authors = ["EarthPulse <it@earthpulse.es>"]
6
6
  license = "MIT"
@@ -25,6 +25,9 @@ sentinelhub = "^3.9.1"
25
25
  pyyaml = "^6.0.1"
26
26
  black = "^23.10.1"
27
27
  mypy = "^1.6.1"
28
+ python-frontmatter = "^1.1.0"
29
+ markdown = "^3.5.2"
30
+ markdownify = "^0.11.6"
28
31
 
29
32
  [tool.poetry.group.dev.dependencies]
30
33
  pytest = "^7.2.2"
@@ -1 +0,0 @@
1
- __version__ = "2024.02.15"
@@ -1,16 +0,0 @@
1
- from pydantic import BaseModel, validator
2
- from typing import List
3
-
4
-
5
- class Metadata(BaseModel):
6
- authors: List[str]
7
- license: str
8
- source: str
9
- name: str
10
-
11
- # validate source is a URL
12
- @validator("source")
13
- def source_is_url(cls, v):
14
- if not v.startswith("http") and not v.startswith("https"):
15
- raise ValueError("source must be a URL")
16
- return v
@@ -1,47 +0,0 @@
1
- from pathlib import Path
2
- import yaml
3
-
4
- from ..auth import with_auth
5
- from .metadata import Metadata
6
- from ..repos import ModelsAPIRepo
7
- from ..shared import calculate_checksum
8
- from ..files import ingest_files
9
-
10
-
11
- def ingest_model(path, verbose=False, logger=print):
12
- path = Path(path)
13
- if not path.is_dir():
14
- raise Exception("Path must be a folder")
15
- # if "catalog.json" in [f.name for f in path.iterdir()]:
16
- # return ingest_stac(path / "catalog.json", logger)
17
- return ingest_folder(path, verbose, logger)
18
-
19
-
20
- def retrieve_model(metadata, user):
21
- repo = ModelsAPIRepo()
22
- data, error = repo.retrieve_model(metadata.name)
23
- # print(data, error)
24
- if data and data["uid"] != user["uid"]:
25
- raise Exception("Model already exists.")
26
- if error and error == "Model doesn't exist":
27
- # create dataset
28
- data, error = repo.create_model(metadata.dict(), user)
29
- # print(data, error)
30
- if error:
31
- raise Exception(error)
32
- data["id"] = data["model_id"]
33
- return data["id"]
34
-
35
-
36
- @with_auth
37
- def ingest_folder(folder, verbose=False, logger=print, user=None):
38
- repo = ModelsAPIRepo()
39
- # load metadata
40
- metadata = yaml.safe_load(open(folder.joinpath("metadata.yml"), "r").read()) or {}
41
- metadata = Metadata(**metadata)
42
- # retrieve model (create if doesn't exist)
43
- model_id = retrieve_model(metadata, user)
44
- # ingest files
45
- return ingest_files(
46
- repo, model_id, folder, verbose, logger, user, endpoint="models"
47
- )
@@ -1,16 +0,0 @@
1
- from pydantic import BaseModel, validator
2
- from typing import List
3
-
4
-
5
- class Metadata(BaseModel):
6
- authors: List[str]
7
- license: str
8
- source: str
9
- name: str
10
-
11
- # validate source is a URL
12
- @validator("source")
13
- def source_is_url(cls, v):
14
- if not v.startswith("http") and not v.startswith("https"):
15
- raise ValueError("source must be a URL")
16
- return v
File without changes