rss_da_stac 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rss_da_stac/__init__.py +50 -0
- rss_da_stac/_version.py +34 -0
- rss_da_stac/cli.py +212 -0
- rss_da_stac/uri2l2a.py +177 -0
- rss_da_stac/utils.py +718 -0
- rss_da_stac-0.0.1.dist-info/METADATA +37 -0
- rss_da_stac-0.0.1.dist-info/RECORD +10 -0
- rss_da_stac-0.0.1.dist-info/WHEEL +4 -0
- rss_da_stac-0.0.1.dist-info/entry_points.txt +3 -0
- rss_da_stac-0.0.1.dist-info/licenses/LICENSE +18 -0
rss_da_stac/__init__.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""
|
|
2
|
+
rss_da_stac: A Python package for working with Sentinel-2 STAC items across different providers
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
# Import all public functions and classes from utils
|
|
6
|
+
from .utils import (
|
|
7
|
+
CDSE,
|
|
8
|
+
ELEMENT84,
|
|
9
|
+
PLANETARYCOMPUTER,
|
|
10
|
+
ItemNotFoundError,
|
|
11
|
+
ProductIdParseError,
|
|
12
|
+
ProviderNotFoundError,
|
|
13
|
+
S2Scene,
|
|
14
|
+
# Exceptions
|
|
15
|
+
S2StacError,
|
|
16
|
+
StacProvider,
|
|
17
|
+
change_processing_level,
|
|
18
|
+
convert_item,
|
|
19
|
+
extract_stac_item_from_tiff,
|
|
20
|
+
get_item_by_id,
|
|
21
|
+
get_provider,
|
|
22
|
+
parse_cdse_s2_id,
|
|
23
|
+
qvf_to_stac,
|
|
24
|
+
stac_to_qvf,
|
|
25
|
+
read_items
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# Define what gets imported with "from rss_da_stac import *"
|
|
30
|
+
__all__ = [
|
|
31
|
+
"StacProvider",
|
|
32
|
+
"S2Scene",
|
|
33
|
+
"CDSE",
|
|
34
|
+
"ELEMENT84",
|
|
35
|
+
"PLANETARYCOMPUTER",
|
|
36
|
+
"parse_cdse_s2_id",
|
|
37
|
+
"get_provider",
|
|
38
|
+
"convert_item",
|
|
39
|
+
"change_processing_level",
|
|
40
|
+
"get_item_by_id",
|
|
41
|
+
"stac_to_qvf",
|
|
42
|
+
"qvf_to_stac",
|
|
43
|
+
"extract_stac_item_from_tiff",
|
|
44
|
+
"read_items",
|
|
45
|
+
# Exceptions
|
|
46
|
+
"S2StacError",
|
|
47
|
+
"ProductIdParseError",
|
|
48
|
+
"ProviderNotFoundError",
|
|
49
|
+
"ItemNotFoundError"
|
|
50
|
+
]
|
rss_da_stac/_version.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# file generated by setuptools-scm
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"__version__",
|
|
6
|
+
"__version_tuple__",
|
|
7
|
+
"version",
|
|
8
|
+
"version_tuple",
|
|
9
|
+
"__commit_id__",
|
|
10
|
+
"commit_id",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
TYPE_CHECKING = False
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from typing import Tuple
|
|
16
|
+
from typing import Union
|
|
17
|
+
|
|
18
|
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
19
|
+
COMMIT_ID = Union[str, None]
|
|
20
|
+
else:
|
|
21
|
+
VERSION_TUPLE = object
|
|
22
|
+
COMMIT_ID = object
|
|
23
|
+
|
|
24
|
+
version: str
|
|
25
|
+
__version__: str
|
|
26
|
+
__version_tuple__: VERSION_TUPLE
|
|
27
|
+
version_tuple: VERSION_TUPLE
|
|
28
|
+
commit_id: COMMIT_ID
|
|
29
|
+
__commit_id__: COMMIT_ID
|
|
30
|
+
|
|
31
|
+
__version__ = version = '0.0.1'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 0, 1)
|
|
33
|
+
|
|
34
|
+
__commit_id__ = commit_id = None
|
rss_da_stac/cli.py
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import sys
|
|
4
|
+
from typing import Optional, List
|
|
5
|
+
|
|
6
|
+
import typer
|
|
7
|
+
from pystac import Item, ItemCollection, STACTypeError
|
|
8
|
+
|
|
9
|
+
import rss_da_stac
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def setup_logging(verbose: int):
|
|
13
|
+
"""Setup logging based on verbosity level."""
|
|
14
|
+
if verbose == 0:
|
|
15
|
+
level = logging.WARNING
|
|
16
|
+
elif verbose == 1:
|
|
17
|
+
level = logging.INFO
|
|
18
|
+
else: # verbose >= 2
|
|
19
|
+
level = logging.DEBUG
|
|
20
|
+
|
|
21
|
+
logging.basicConfig(
|
|
22
|
+
level=level,
|
|
23
|
+
format='%(levelname)s: %(message)s',
|
|
24
|
+
stream=sys.stderr # Log to stderr so it doesn't interfere with JSON output to stdout
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def read_item(src: Optional[str] = None) -> ItemCollection:
|
|
29
|
+
"""
|
|
30
|
+
Get JSON data either from file or stdin pipe.
|
|
31
|
+
Should work for either item or itemcollection
|
|
32
|
+
|
|
33
|
+
this is almost a duplicate of rss_da_stac.read_items which
|
|
34
|
+
might cause confusion
|
|
35
|
+
"""
|
|
36
|
+
if src:
|
|
37
|
+
# Read from file
|
|
38
|
+
try:
|
|
39
|
+
item = Item.from_file(src)
|
|
40
|
+
item_coll = ItemCollection(items=[item])
|
|
41
|
+
except STACTypeError:
|
|
42
|
+
item_coll = ItemCollection.from_file(src)
|
|
43
|
+
else:
|
|
44
|
+
stdin_data = sys.stdin.read()
|
|
45
|
+
jsondata = json.loads(stdin_data)
|
|
46
|
+
try:
|
|
47
|
+
item = Item.from_dict(jsondata)
|
|
48
|
+
item_coll = ItemCollection(items=[item])
|
|
49
|
+
except STACTypeError:
|
|
50
|
+
item_coll = ItemCollection.from_dict(jsondata)
|
|
51
|
+
return item_coll
|
|
52
|
+
|
|
53
|
+
app = typer.Typer()
|
|
54
|
+
|
|
55
|
+
@app.callback()
|
|
56
|
+
def main(
|
|
57
|
+
verbose: int = typer.Option(0, "-v", "--verbose", count=True,
|
|
58
|
+
help="Increase verbosity. Use -v for info, -vv for debug")
|
|
59
|
+
):
|
|
60
|
+
"""
|
|
61
|
+
CLI tool for STAC item processing.
|
|
62
|
+
|
|
63
|
+
Use -v for info logging, -vv for debug logging.
|
|
64
|
+
"""
|
|
65
|
+
setup_logging(verbose)
|
|
66
|
+
|
|
67
|
+
@app.command()
|
|
68
|
+
def qvf2item(
|
|
69
|
+
qvfnames: List[str] = typer.Argument(None, help="One or more valid S2 QVF filenames"),
|
|
70
|
+
file: Optional[str] = typer.Option(None, "--file", "-f", help="File containing QVF filenames (one per line)"),
|
|
71
|
+
provider: str = typer.Option('element84', '--provider', help="STAC provider: element84, planetary-computer, or CDSE"),
|
|
72
|
+
level: str = typer.Option('l2a', '--level', help="Processing level (l2a, l2apre, or l1c)"),
|
|
73
|
+
geom: Optional[str] = typer.Option(None, help="Geometry to help in filtering duplicates"),
|
|
74
|
+
pretty: bool = typer.Option(False, '--pretty', help="Pretty-print the JSON output")
|
|
75
|
+
):
|
|
76
|
+
"""
|
|
77
|
+
Convert one or more QVF files to a STAC ItemCollection.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
qvfnames (List[str]): One or more QVF filenames to process (positional arguments).
|
|
81
|
+
file (Optional[str]): Path to a file containing QVF filenames (one per line).
|
|
82
|
+
provider (str): The STAC provider (element84, planetary-computer, or CDSE).
|
|
83
|
+
level (str): The processing level (l2a, l2apre, or l1c).
|
|
84
|
+
pretty (bool): Whether to pretty-print the JSON output.
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
# Combine filenames from command-line arguments and file (if provided)
|
|
88
|
+
if qvfnames:
|
|
89
|
+
filenames = list(qvfnames) # Start with filenames from command-line arguments
|
|
90
|
+
else:
|
|
91
|
+
filenames = []
|
|
92
|
+
if file:
|
|
93
|
+
try:
|
|
94
|
+
with open(file, 'r') as f:
|
|
95
|
+
file_lines = [line.strip() for line in f if line.strip()] # Read non-empty lines
|
|
96
|
+
filenames.extend(file_lines) # Add filenames from the file
|
|
97
|
+
except Exception as e:
|
|
98
|
+
typer.echo(f"Error reading file '{file}': {e}", err=True)
|
|
99
|
+
raise typer.Exit(1)
|
|
100
|
+
|
|
101
|
+
# Ensure at least one filename is provided
|
|
102
|
+
if not filenames:
|
|
103
|
+
typer.echo("Error: No QVF filenames provided. Use positional arguments or the --file option.", err=True)
|
|
104
|
+
raise typer.Exit(1)
|
|
105
|
+
|
|
106
|
+
# Map provider to STAC provider constants
|
|
107
|
+
match provider.lower():
|
|
108
|
+
case 'element84':
|
|
109
|
+
stac_prov = rss_da_stac.ELEMENT84
|
|
110
|
+
case 'copernicus' | 'cdse':
|
|
111
|
+
stac_prov = rss_da_stac.CDSE
|
|
112
|
+
case 'planetary-computer' | 'planetary_computer' | 'planetarycomputer':
|
|
113
|
+
stac_prov = rss_da_stac.PLANETARYCOMPUTER
|
|
114
|
+
case _:
|
|
115
|
+
typer.echo(f"Error: provider '{provider}' not recognised. Use 'element84', 'planetary-computer', or 'CDSE'.", err=True)
|
|
116
|
+
raise typer.Exit(1)
|
|
117
|
+
|
|
118
|
+
# Process each QVF file and collect the resulting STAC Items
|
|
119
|
+
items = []
|
|
120
|
+
for qvfname in filenames:
|
|
121
|
+
try:
|
|
122
|
+
itemcoll = rss_da_stac.qvf_to_stac(qvfname, dst_provider=stac_prov, level=level, geom=geom)
|
|
123
|
+
items.extend(itemcoll.items)
|
|
124
|
+
except Exception as e:
|
|
125
|
+
typer.echo(f"Error processing QVF file '{qvfname}': {e}", err=True)
|
|
126
|
+
raise typer.Exit(1)
|
|
127
|
+
|
|
128
|
+
# Create an ItemCollection from the collected items
|
|
129
|
+
item_collection = ItemCollection(items=items)
|
|
130
|
+
output = json.dumps(item_collection.to_dict(), indent=4 if pretty else None)
|
|
131
|
+
print(output)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
@app.command()
|
|
136
|
+
def item2qvf(json_src: Optional[str] = typer.Argument(None, help="JSON file path"),
|
|
137
|
+
stage: str = typer.Option("adc", "--stage", help="Stage parameter")):
|
|
138
|
+
"""
|
|
139
|
+
Given a stac item or itemcollection, provide the equivalent qvf compliant
|
|
140
|
+
name for each item. Use the option '--stage' to set the output stage
|
|
141
|
+
|
|
142
|
+
Example
|
|
143
|
+
|
|
144
|
+
rss_da_stac qvf2item cfmsre_t56kkv_20221223_adbm6.tif|s2stac item2qvf --stage adc
|
|
145
|
+
|
|
146
|
+
"""
|
|
147
|
+
item_coll = read_item(json_src)
|
|
148
|
+
for item in item_coll.items:
|
|
149
|
+
qvfname = rss_da_stac.stac_to_qvf(item, stage=stage)
|
|
150
|
+
print(qvfname)
|
|
151
|
+
|
|
152
|
+
@app.command()
|
|
153
|
+
def item2item(json_src: Optional[str] = typer.Argument(None, help="JSON file path"),
|
|
154
|
+
provider: str = typer.Option('element84', '--provider', help="STAC provider: element84 or CDSE")):
|
|
155
|
+
"""Convert item to item format."""
|
|
156
|
+
match provider.lower():
|
|
157
|
+
case 'element84':
|
|
158
|
+
stac_prov = rss_da_stac.ELEMENT84
|
|
159
|
+
case 'copernicus':
|
|
160
|
+
stac_prov = rss_da_stac.CDSE
|
|
161
|
+
case 'cdse':
|
|
162
|
+
stac_prov = rss_da_stac.CDSE
|
|
163
|
+
case _:
|
|
164
|
+
typer.echo(f"Error: provider '{provider}' not recognised. Use 'element84' or 'CDSE'", err=True)
|
|
165
|
+
raise typer.Exit(1)
|
|
166
|
+
|
|
167
|
+
item_coll = read_item(json_src)
|
|
168
|
+
new_items = []
|
|
169
|
+
for item in item_coll.items:
|
|
170
|
+
newitem = rss_da_stac.convert_item(item, stac_prov)
|
|
171
|
+
new_items.append(newitem)
|
|
172
|
+
new_item_coll = ItemCollection(items=new_items)
|
|
173
|
+
print(json.dumps(new_item_coll.to_dict()))
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
@app.command()
|
|
177
|
+
def relevel(json_src: Optional[str] = typer.Argument(None, help="JSON file path")):
|
|
178
|
+
item_coll = read_item(json_src)
|
|
179
|
+
new_items = []
|
|
180
|
+
for item in item_coll.items:
|
|
181
|
+
newitem = rss_da_stac.change_processing_level(item)
|
|
182
|
+
new_items.append(newitem)
|
|
183
|
+
new_item_coll = ItemCollection(items=new_items)
|
|
184
|
+
print(json.dumps(new_item_coll.to_dict()))
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
@app.command()
|
|
189
|
+
def histitem(tif_file: str = typer.Argument(..., help="geotiff with item in metadata")):
|
|
190
|
+
item = rss_da_stac.extract_stac_item_from_tiff(tif_file)
|
|
191
|
+
print(json.dumps(item.to_dict(transform_hrefs=False)))
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
@app.command()
|
|
195
|
+
def cat(
|
|
196
|
+
files: List[str] = typer.Argument(..., help="List of JSON file paths to concatenate")
|
|
197
|
+
):
|
|
198
|
+
"""
|
|
199
|
+
Concatenate items from a list of files into a single ItemCollection.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
files (List[str]): A list of file paths containing STAC Items or ItemCollections.
|
|
203
|
+
"""
|
|
204
|
+
all_items = []
|
|
205
|
+
for fname in files:
|
|
206
|
+
all_items += read_item(fname).items # Read items from each file and add to the list
|
|
207
|
+
icoll = ItemCollection(items=all_items)
|
|
208
|
+
print(json.dumps(icoll.to_dict())) # Output the concatenated ItemCollection as JSON
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
if __name__ == "__main__":
|
|
212
|
+
app()
|
rss_da_stac/uri2l2a.py
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module aims to provide a way of mapping from an L1C item to an L2A item.
|
|
3
|
+
Turns out that this isn't quite as easy as it sounds, since there can be
|
|
4
|
+
more than one satellite, capture date, tile combinations.
|
|
5
|
+
|
|
6
|
+
When you get two possible ones, you can't automatically work out which one you want.
|
|
7
|
+
|
|
8
|
+
For example, if you have an esaid like
|
|
9
|
+
|
|
10
|
+
S2B_MSIL1C_20250917T003659_N0511_R059_T54KZV_20250917T043259
|
|
11
|
+
|
|
12
|
+
and you want to find the L2A equivalent, you have to choose between
|
|
13
|
+
S2B_MSIL2A_20250917T003659_N0511_R059_T54KZV_20250917T044524
|
|
14
|
+
|
|
15
|
+
and
|
|
16
|
+
|
|
17
|
+
S2B_MSIL2A_20250917T003659_N0511_R059_T54KZV_20250917T035150
|
|
18
|
+
|
|
19
|
+
Notice that the product discrimination times don't match the original
|
|
20
|
+
(20250917T043259).
|
|
21
|
+
|
|
22
|
+
The assumption I'm going to make is that the order will be the same. Which
|
|
23
|
+
means you need to find the order of the L1C first, Ie get the candidates
|
|
24
|
+
sortd by product discrimination times:
|
|
25
|
+
|
|
26
|
+
S2B_MSIL1C_20250917T003659_N0511_R059_T54KZV_20250917T033704
|
|
27
|
+
S2B_MSIL1C_20250917T003659_N0511_R059_T54KZV_20250917T043259
|
|
28
|
+
|
|
29
|
+
and notice that our match is the _second_ of these. Therefore we
|
|
30
|
+
want the _second_ of the L2A candidates. So we want
|
|
31
|
+
|
|
32
|
+
S2B_MSIL2A_20250917T003659_N0511_R059_T54KZV_20250917T044524
|
|
33
|
+
|
|
34
|
+
maybe there is an easier way.
|
|
35
|
+
|
|
36
|
+
"""
|
|
37
|
+
import json
|
|
38
|
+
from typing import List, Optional
|
|
39
|
+
from pystac import Item, ItemCollection
|
|
40
|
+
from pystac_client import Client
|
|
41
|
+
import typer
|
|
42
|
+
from rss_da_stac import StacProvider, get_provider
|
|
43
|
+
from rss_da_stac import ELEMENT84, CDSE
|
|
44
|
+
from rss_da_stac import parse_cdse_s2_id
|
|
45
|
+
|
|
46
|
+
app = typer.Typer()
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def filter_by_id(item_coll: ItemCollection, product_uri: str) -> Item:
|
|
50
|
+
"""
|
|
51
|
+
I'm thinking the reliable way to do this is by checking the order
|
|
52
|
+
in which these have been created from the source of origin.
|
|
53
|
+
so we might need to get both items from copernicus
|
|
54
|
+
|
|
55
|
+
Assume product_uri is an l1c style
|
|
56
|
+
|
|
57
|
+
we return the index of the matching item assuming we sort
|
|
58
|
+
by product_disc_time
|
|
59
|
+
|
|
60
|
+
we search by the datatake start time. Is that going to be correct?
|
|
61
|
+
|
|
62
|
+
"""
|
|
63
|
+
details = parse_cdse_s2_id(product_uri)
|
|
64
|
+
client = Client.open(ELEMENT84.url)
|
|
65
|
+
collection = ['sentinel-2-l1c']
|
|
66
|
+
datestring = details['datatake_start_time'].strftime("%Y-%m-%d")
|
|
67
|
+
search = client.search(
|
|
68
|
+
collections=collection,
|
|
69
|
+
datetime=datestring,
|
|
70
|
+
query={
|
|
71
|
+
"grid:code": {"eq": details["grid:code"]}
|
|
72
|
+
}
|
|
73
|
+
)
|
|
74
|
+
coll = search.item_collection()
|
|
75
|
+
product_disc_times = []
|
|
76
|
+
product_uris = []
|
|
77
|
+
for item in coll.items:
|
|
78
|
+
thisuril = item.properties['s2:product_uri'].replace('.SAFE','')
|
|
79
|
+
product_uris.append(thisuril)
|
|
80
|
+
itemdetails = parse_cdse_s2_id(thisuril)
|
|
81
|
+
product_disc_times.append(itemdetails['product_disc_time'])
|
|
82
|
+
sorted_uris = [uri for _, uri in sorted(zip(product_disc_times, product_uris))]
|
|
83
|
+
index = sorted_uris.index(product_uri)
|
|
84
|
+
|
|
85
|
+
# now sort our original item_coll
|
|
86
|
+
# since l2a for both cdse and element84 include the product_disc_time
|
|
87
|
+
# in the id then we can just sort the ids
|
|
88
|
+
ids = [item.id for item in item_coll]
|
|
89
|
+
# if they are cdse style then use parse
|
|
90
|
+
if get_provider(item_coll.items[0]) == CDSE:
|
|
91
|
+
product_disc_times = [parse_cdse_s2_id(id)['product_disc_time'] for id in ids]
|
|
92
|
+
else:
|
|
93
|
+
# you need to do it via
|
|
94
|
+
product_disc_times = [parse_cdse_s2_id(item.properties['s2:product_uri'])['product_disc_time'] for item in item_coll.items]
|
|
95
|
+
# sort the item
|
|
96
|
+
sorted_items = [item for _, item in sorted(zip(product_disc_times, item_coll.items))]
|
|
97
|
+
return sorted_items[index]
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def product_uri_to_l2a(product_uri: str, dst_provider: StacProvider=ELEMENT84) -> Item:
|
|
102
|
+
"""
|
|
103
|
+
given a product uri, like 'S2B_MSIL1C_20250917T003659_N0511_R059_T54KZV_20250917T043259'
|
|
104
|
+
get the equivalent as an element84 s2l2a
|
|
105
|
+
|
|
106
|
+
"""
|
|
107
|
+
details = parse_cdse_s2_id(product_uri)
|
|
108
|
+
level = 'l2a'
|
|
109
|
+
client = Client.open(dst_provider.url)
|
|
110
|
+
collection = dst_provider.__getattribute__(level)
|
|
111
|
+
#datestring = details['product_disc_time'].strftime("%Y-%m-%d")
|
|
112
|
+
datestring = details['datatake_start_time'].strftime("%Y-%m-%d")
|
|
113
|
+
search = client.search(
|
|
114
|
+
collections=[collection],
|
|
115
|
+
datetime=datestring,
|
|
116
|
+
query={
|
|
117
|
+
"grid:code": {"eq": details["grid:code"]}
|
|
118
|
+
}
|
|
119
|
+
)
|
|
120
|
+
coll = search.item_collection()
|
|
121
|
+
# now filter if needed
|
|
122
|
+
if len(coll.items) == 1:
|
|
123
|
+
selitem = coll.items[0]
|
|
124
|
+
return selitem
|
|
125
|
+
if len(coll.items) > 1:
|
|
126
|
+
sel_item = filter_by_id(coll, product_uri)
|
|
127
|
+
return sel_item
|
|
128
|
+
return None
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
@app.command()
|
|
132
|
+
def updateproduct(
|
|
133
|
+
uris: List[str] = typer.Argument(None, help="One or more valid S2 L1C product uris"),
|
|
134
|
+
file: Optional[str] = typer.Option(None, "--file", "-f", help="File containing S2 L1c product uris (one per line)"),
|
|
135
|
+
provider: str = typer.Option('element84', '--provider', help="STAC provider: element84, or CDSE"),
|
|
136
|
+
):
|
|
137
|
+
"""
|
|
138
|
+
given s1c esa ids, convert to s2l2a and change provider to element84
|
|
139
|
+
|
|
140
|
+
"""
|
|
141
|
+
match provider.lower():
|
|
142
|
+
case 'element84':
|
|
143
|
+
stac_prov = ELEMENT84
|
|
144
|
+
case 'copernicus' | 'cdse':
|
|
145
|
+
stac_prov = CDSE
|
|
146
|
+
case _:
|
|
147
|
+
typer.echo(f"Error: provider '{provider}' not recognised. Use 'element84', 'planetary-computer', or 'CDSE'.", err=True)
|
|
148
|
+
raise typer.Exit(1)
|
|
149
|
+
|
|
150
|
+
if uris:
|
|
151
|
+
product_uris = list(uris) # Start with filenames from command-line arguments
|
|
152
|
+
else:
|
|
153
|
+
product_uris = []
|
|
154
|
+
if file:
|
|
155
|
+
try:
|
|
156
|
+
with open(file, 'r') as f:
|
|
157
|
+
file_lines = [line.strip() for line in f if line.strip()] # Read non-empty lines
|
|
158
|
+
product_uris.extend(file_lines) # Add filenames from the file
|
|
159
|
+
except Exception as e:
|
|
160
|
+
typer.echo(f"Error reading file '{file}': {e}", err=True)
|
|
161
|
+
raise typer.Exit(1)
|
|
162
|
+
|
|
163
|
+
# Process each product uri file and collect the resulting STAC Items
|
|
164
|
+
items = []
|
|
165
|
+
for uri in product_uris:
|
|
166
|
+
try:
|
|
167
|
+
item = product_uri_to_l2a(uri, dst_provider=stac_prov)
|
|
168
|
+
items.append(item)
|
|
169
|
+
except Exception as e:
|
|
170
|
+
typer.echo(f"Error processing uri '{uri}': {e}", err=True)
|
|
171
|
+
raise typer.Exit(1)
|
|
172
|
+
item_collection = ItemCollection(items=items)
|
|
173
|
+
output = json.dumps(item_collection.to_dict())
|
|
174
|
+
print(output)
|
|
175
|
+
|
|
176
|
+
if __name__ == "__main__":
|
|
177
|
+
app()
|
rss_da_stac/utils.py
ADDED
|
@@ -0,0 +1,718 @@
|
|
|
1
|
+
"""
|
|
2
|
+
rss_da_stac: A Python package for working with Sentinel-2 STAC items across different providers
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import base64
|
|
6
|
+
import json
|
|
7
|
+
import logging
|
|
8
|
+
import re
|
|
9
|
+
import xml.etree.ElementTree as ET
|
|
10
|
+
import zlib
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
from datetime import datetime, timedelta
|
|
13
|
+
from shapely.geometry import Polygon
|
|
14
|
+
from shapely.geometry import shape, mapping
|
|
15
|
+
|
|
16
|
+
from typing import Any, Dict, Optional
|
|
17
|
+
from typing import List
|
|
18
|
+
|
|
19
|
+
import tifftools
|
|
20
|
+
from pystac import Item, ItemCollection, STACTypeError
|
|
21
|
+
from pystac_client import Client
|
|
22
|
+
|
|
23
|
+
# Set up logging
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class S2Scene:
|
|
29
|
+
"""
|
|
30
|
+
Represents a Sentinel-2 scene identifier with validation.
|
|
31
|
+
|
|
32
|
+
Attributes:
|
|
33
|
+
s2scene (str): The Sentinel-2 scene identifier.
|
|
34
|
+
|
|
35
|
+
Methods:
|
|
36
|
+
validate_s2scene(value: str) -> str : Class method to validate
|
|
37
|
+
the s2scene identifier format, ensuring it follows either
|
|
38
|
+
'tXXYYY' format or the 'MGRS-XXYYY' format.
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
Examples:
|
|
42
|
+
Using positional argument:
|
|
43
|
+
scene = S2Scene('MGRS-32TNL')
|
|
44
|
+
|
|
45
|
+
Using keyword argument:
|
|
46
|
+
scene = S2Scene(s2scene='t01cdn')
|
|
47
|
+
|
|
48
|
+
"""
|
|
49
|
+
s2scene: str
|
|
50
|
+
def __post_init__(self):
|
|
51
|
+
value = self.s2scene
|
|
52
|
+
if value.upper().startswith('MGRS-'):
|
|
53
|
+
value = value[5:] # Remove the 'MGRS-' prefix
|
|
54
|
+
if len(value) == 5 and all(part.isalnum() for part in value):
|
|
55
|
+
return f't{value.lower()}'
|
|
56
|
+
else:
|
|
57
|
+
raise ValueError(f'S2 scene format must be either "tXXYYY" or "MGRS-XXYYY", received: {value}')
|
|
58
|
+
if len(value) == 6 and value.startswith('t') and all(part.isalnum() for part in value[1:]):
|
|
59
|
+
return value.lower()
|
|
60
|
+
else:
|
|
61
|
+
raise ValueError(f'S2 scene format must be either "tXXYYY" or "MGRS-XXYYY", received: {value}')
|
|
62
|
+
|
|
63
|
+
def qvf_style(self):
|
|
64
|
+
if self.s2scene.upper().startswith('MGRS-'):
|
|
65
|
+
return f"t{self.s2scene[5:].lower()}"
|
|
66
|
+
else:
|
|
67
|
+
return(self.s2scene)
|
|
68
|
+
|
|
69
|
+
def mgrs_style(self):
|
|
70
|
+
if self.s2scene.startswith('t'):
|
|
71
|
+
return f"MGRS-{self.s2scene[1:].upper()}"
|
|
72
|
+
else:
|
|
73
|
+
return(self.s2scene)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@dataclass
|
|
78
|
+
class StacProvider:
|
|
79
|
+
"""Represents a STAC provider with its endpoints and collection names."""
|
|
80
|
+
name: str
|
|
81
|
+
url: str
|
|
82
|
+
l1c: str
|
|
83
|
+
l2a: str
|
|
84
|
+
l2apre :str
|
|
85
|
+
l2aold :str
|
|
86
|
+
mgrs_tile: str
|
|
87
|
+
|
|
88
|
+
def __post_init__(self):
|
|
89
|
+
"""Validate provider configuration."""
|
|
90
|
+
if not self.url.startswith(('http://', 'https://')):
|
|
91
|
+
raise ValueError(f"Invalid URL for provider {self.name}: {self.url}")
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# Provider configurations
|
|
95
|
+
CDSE = StacProvider(
|
|
96
|
+
'CDSE',
|
|
97
|
+
url="https://stac.dataspace.copernicus.eu/v1",
|
|
98
|
+
l1c='sentinel-2-l1c',
|
|
99
|
+
l2a="sentinel-2-l2a",
|
|
100
|
+
l2apre="sentinel-2-l2a",
|
|
101
|
+
l2aold="sentinel-2-l2a",
|
|
102
|
+
mgrs_tile="grid:code"
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
ELEMENT84 = StacProvider(
|
|
106
|
+
'Element84',
|
|
107
|
+
url="https://earth-search.aws.element84.com/v1",
|
|
108
|
+
l1c='sentinel-2-l1c',
|
|
109
|
+
l2a='sentinel-2-c1-l2a',
|
|
110
|
+
l2apre='sentinel-2-pre-c1-l2a',
|
|
111
|
+
l2aold="sentinel-2-l2a",
|
|
112
|
+
mgrs_tile="grid:code"
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
PLANETARYCOMPUTER = StacProvider(
|
|
117
|
+
'planetary-computer',
|
|
118
|
+
url="https://planetarycomputer.microsoft.com/api/stac/v1/",
|
|
119
|
+
l1c=None,
|
|
120
|
+
l2a='sentinel-2-l2a',
|
|
121
|
+
l2apre=None,
|
|
122
|
+
l2aold=None,
|
|
123
|
+
mgrs_tile="s2:mgrs_tile"
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
#https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-2-l2a
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class S2StacError(Exception):
|
|
131
|
+
"""Base exception for rss_da_stac operations."""
|
|
132
|
+
pass
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class ProductIdParseError(S2StacError):
|
|
136
|
+
"""Raised when a Sentinel-2 product ID cannot be parsed."""
|
|
137
|
+
pass
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
class ProviderNotFoundError(S2StacError):
|
|
141
|
+
"""Raised when a provider cannot be determined from an item."""
|
|
142
|
+
pass
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
class ItemNotFoundError(S2StacError):
|
|
146
|
+
"""Raised when an item cannot be found in a STAC catalog."""
|
|
147
|
+
pass
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def parse_cdse_s2_id(product_id: str) -> Dict[str, Any]:
|
|
151
|
+
"""
|
|
152
|
+
Parse Sentinel-2 product ID to extract key metadata.
|
|
153
|
+
|
|
154
|
+
Format: S2A_MSIL1C_20170118T001201_N0204_R073_T55HED_20170118T001250[.SAFE]
|
|
155
|
+
This is the CDSE format.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
product_id: Sentinel-2 product identifier
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
Dictionary containing parsed metadata
|
|
162
|
+
|
|
163
|
+
Raises:
|
|
164
|
+
ProductIdParseError: If the product ID format is invalid
|
|
165
|
+
"""
|
|
166
|
+
if not isinstance(product_id, str):
|
|
167
|
+
raise ProductIdParseError(f"Product ID must be a string, got {type(product_id)}")
|
|
168
|
+
|
|
169
|
+
# Remove .SAFE suffix if present
|
|
170
|
+
clean_id = product_id.replace('.SAFE', '')
|
|
171
|
+
|
|
172
|
+
# Parse using regex
|
|
173
|
+
pattern = r'S2([ABC])_(MSIL1C|MSIL2A)_(\d{8}T\d{6})_N\d{4}_R(\d{3})_T(\w{5})_(\d{8}T\d{6})'
|
|
174
|
+
match = re.match(pattern, clean_id)
|
|
175
|
+
|
|
176
|
+
if not match:
|
|
177
|
+
raise ProductIdParseError(f"Could not parse product ID: {product_id}")
|
|
178
|
+
|
|
179
|
+
satellite = match.group(1) # A, B, or C
|
|
180
|
+
s2level = match.group(2) # MSIL1C or MSIL2A
|
|
181
|
+
datatake_start_time = match.group(3) # 20170118T001201
|
|
182
|
+
relative_orbit = match.group(4) # 073
|
|
183
|
+
tile_id = match.group(5) # 55HED
|
|
184
|
+
product_disc_time = match.group(6)
|
|
185
|
+
# Convert sensing time to datetime
|
|
186
|
+
try:
|
|
187
|
+
datatake_start_time = datetime.strptime(datatake_start_time, '%Y%m%dT%H%M%S')
|
|
188
|
+
except ValueError as e:
|
|
189
|
+
raise ProductIdParseError(f"Invalid sensing time format in {product_id}: {e}")
|
|
190
|
+
try:
|
|
191
|
+
product_disc_time = datetime.strptime(product_disc_time, '%Y%m%dT%H%M%S')
|
|
192
|
+
except ValueError as e:
|
|
193
|
+
raise ProductIdParseError(f"Invalid sensing time format in {product_id}: {e}")
|
|
194
|
+
|
|
195
|
+
return {
|
|
196
|
+
'satellite': satellite,
|
|
197
|
+
'datatake_start_time': datatake_start_time,
|
|
198
|
+
's2level': s2level,
|
|
199
|
+
'relative_orbit': relative_orbit,
|
|
200
|
+
'grid:code': f"MGRS-{tile_id}",
|
|
201
|
+
'tile_id': tile_id,
|
|
202
|
+
'product_disc_time': product_disc_time,
|
|
203
|
+
'original_id': product_id
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def get_provider(item: Item) -> Optional[StacProvider]:
|
|
208
|
+
"""
|
|
209
|
+
Determine the STAC provider for a given item.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
item: STAC Item to analyze
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
StacProvider instance or None if provider cannot be determined
|
|
216
|
+
"""
|
|
217
|
+
if not isinstance(item, Item):
|
|
218
|
+
raise TypeError(f"Expected pystac.Item, got {type(item)}")
|
|
219
|
+
|
|
220
|
+
if not item.self_href:
|
|
221
|
+
logger.warning(f"Item {item.id} has no self_href")
|
|
222
|
+
return None
|
|
223
|
+
for link in item.links:
|
|
224
|
+
if link.target.startswith('https://earth-search.aws.element84.com/'):
|
|
225
|
+
return ELEMENT84
|
|
226
|
+
if link.target.startswith('https://stac.dataspace.copernicus.eu'):
|
|
227
|
+
return CDSE
|
|
228
|
+
if link.target.startswith('https://planetarycomputer.microsoft.com/'):
|
|
229
|
+
return PLANETARYCOMPUTER
|
|
230
|
+
logger.warning(f"Unknown provider for URL: {item.self_href}")
|
|
231
|
+
return None
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def convert_item(item: Item, dst_provider: StacProvider, timeout_hours: int = 1) -> Optional[Item]:
|
|
235
|
+
"""
|
|
236
|
+
Convert a STAC item from one provider to another.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
item: Source STAC Item
|
|
240
|
+
dst_provider: Target provider
|
|
241
|
+
timeout_hours: Time window to search for matching items
|
|
242
|
+
|
|
243
|
+
Returns:
|
|
244
|
+
Converted STAC Item or None if not found
|
|
245
|
+
|
|
246
|
+
Raises:
|
|
247
|
+
ProviderNotFoundError: If source provider cannot be determined
|
|
248
|
+
S2StacError: If collection type cannot be matched
|
|
249
|
+
"""
|
|
250
|
+
src_provider = get_provider(item)
|
|
251
|
+
if src_provider is None:
|
|
252
|
+
raise ProviderNotFoundError(f"Cannot determine provider for item {item.id}")
|
|
253
|
+
|
|
254
|
+
# Determine target collection
|
|
255
|
+
if src_provider.l2a == item.collection_id:
|
|
256
|
+
collection = dst_provider.l2a
|
|
257
|
+
elif src_provider.l2aold == item.collection_id:
|
|
258
|
+
collection = dst_provider.l2aold
|
|
259
|
+
elif src_provider.l1c == item.collection_id:
|
|
260
|
+
collection = dst_provider.l1c
|
|
261
|
+
else:
|
|
262
|
+
#
|
|
263
|
+
raise S2StacError(f"Collection {item.collection_id} not matched for provider {src_provider.name}")
|
|
264
|
+
|
|
265
|
+
# Search for matching item
|
|
266
|
+
try:
|
|
267
|
+
client = Client.open(dst_provider.url)
|
|
268
|
+
start_time = item.datetime - timedelta(hours=timeout_hours)
|
|
269
|
+
end_time = item.datetime + timedelta(hours=timeout_hours)
|
|
270
|
+
grid_code_key = src_provider.__getattribute__('mgrs_tile')
|
|
271
|
+
grid_code = item.properties.get(grid_code_key)
|
|
272
|
+
if src_provider.name == 'planetary-computer':
|
|
273
|
+
grid_code = f"MGRS-{grid_code}"
|
|
274
|
+
|
|
275
|
+
dst_code_key = dst_provider.__getattribute__('mgrs_tile')
|
|
276
|
+
dst_grid_code = grid_code
|
|
277
|
+
if dst_provider.name == 'planetary-computer':
|
|
278
|
+
dst_grid_code = dst_grid_code.replace("MgRS-",'')
|
|
279
|
+
search = client.search(
|
|
280
|
+
collections=[collection],
|
|
281
|
+
datetime=[start_time, end_time],
|
|
282
|
+
query={
|
|
283
|
+
dst_code_key: {"eq": dst_grid_code}
|
|
284
|
+
}
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
coll = search.item_collection()
|
|
288
|
+
items = list(coll.items)
|
|
289
|
+
|
|
290
|
+
if not items:
|
|
291
|
+
logger.warning(f"No {dst_provider.name} product found for item {item.id}")
|
|
292
|
+
return None
|
|
293
|
+
|
|
294
|
+
if len(items) > 1:
|
|
295
|
+
# we filter
|
|
296
|
+
logger.info(f"Multiple {dst_provider.name} products found for item {item.id}, filtering by geom")
|
|
297
|
+
selitem = filter_items(items, item.geometry)
|
|
298
|
+
return selitem
|
|
299
|
+
return items[0]
|
|
300
|
+
|
|
301
|
+
except Exception as e:
|
|
302
|
+
logger.error(f"Error searching {dst_provider.name} for item {item.id}: {e}")
|
|
303
|
+
raise S2StacError(f"Failed to search provider {dst_provider.name}: {e}")
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def change_processing_level(item: Item, timeout_hours: int = 1) -> Optional[Item]:
|
|
307
|
+
"""
|
|
308
|
+
Convert an item between L1C and L2A processing levels within the same provider.
|
|
309
|
+
|
|
310
|
+
Args:
|
|
311
|
+
item: Source STAC Item
|
|
312
|
+
timeout_hours: Time window to search for matching items
|
|
313
|
+
|
|
314
|
+
Returns:
|
|
315
|
+
Item with different processing level or None if not found
|
|
316
|
+
|
|
317
|
+
Raises:
|
|
318
|
+
ProviderNotFoundError: If source provider cannot be determined
|
|
319
|
+
S2StacError: If collection type cannot be matched
|
|
320
|
+
"""
|
|
321
|
+
src_provider = get_provider(item)
|
|
322
|
+
if src_provider is None:
|
|
323
|
+
raise ProviderNotFoundError(f"Cannot determine provider for item {item.id}")
|
|
324
|
+
|
|
325
|
+
# Determine target collection (opposite level)
|
|
326
|
+
if item.collection_id in [src_provider.l2a, src_provider.l2aold]:
|
|
327
|
+
collection = src_provider.l1c
|
|
328
|
+
elif src_provider.l1c == item.collection_id:
|
|
329
|
+
collection = src_provider.l2a
|
|
330
|
+
else:
|
|
331
|
+
raise S2StacError(f"Collection {item.collection_id} not matched for provider {src_provider.name}")
|
|
332
|
+
|
|
333
|
+
try:
|
|
334
|
+
client = Client.open(src_provider.url)
|
|
335
|
+
start_time = item.datetime - timedelta(hours=timeout_hours)
|
|
336
|
+
end_time = item.datetime + timedelta(hours=timeout_hours)
|
|
337
|
+
|
|
338
|
+
search = client.search(
|
|
339
|
+
collections=[collection],
|
|
340
|
+
datetime=[start_time, end_time],
|
|
341
|
+
query={
|
|
342
|
+
"grid:code": {"eq": item.properties.get('grid:code')}
|
|
343
|
+
}
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
coll = search.item_collection()
|
|
347
|
+
items = list(coll.items)
|
|
348
|
+
|
|
349
|
+
if not items:
|
|
350
|
+
logger.warning(f"No {src_provider.name} product found for item {item.id}")
|
|
351
|
+
return None
|
|
352
|
+
|
|
353
|
+
if len(items) > 1:
|
|
354
|
+
logger.warning(f"Multiple {src_provider.name} products found for item {item.id}, using first")
|
|
355
|
+
itemids = [item.id for item in items]
|
|
356
|
+
logger.info(itemids)
|
|
357
|
+
|
|
358
|
+
return items[0]
|
|
359
|
+
|
|
360
|
+
except Exception as e:
|
|
361
|
+
logger.error(f"Error changing level for item {item.id}: {e}")
|
|
362
|
+
raise S2StacError(f"Failed to change processing level: {e}")
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def get_item_by_id(product_id: str, timeout_hours: int = 1) -> Optional[Item]:
|
|
366
|
+
"""
|
|
367
|
+
Retrieve a STAC item by its product ID.
|
|
368
|
+
|
|
369
|
+
Args:
|
|
370
|
+
product_id: Sentinel-2 product identifier
|
|
371
|
+
timeout_hours: Search timeout (unused for ID-based search)
|
|
372
|
+
|
|
373
|
+
Returns:
|
|
374
|
+
STAC Item or None if not found
|
|
375
|
+
|
|
376
|
+
Raises:
|
|
377
|
+
S2StacError: If provider cannot be determined from ID
|
|
378
|
+
"""
|
|
379
|
+
if not isinstance(product_id, str):
|
|
380
|
+
raise TypeError(f"Product ID must be a string, got {type(product_id)}")
|
|
381
|
+
|
|
382
|
+
clean_id = product_id.replace('.SAFE', '')
|
|
383
|
+
|
|
384
|
+
# Determine provider based on ID format
|
|
385
|
+
|
|
386
|
+
if 'MSIL2A' in clean_id or 'MSIL1C' in clean_id:
|
|
387
|
+
if len(clean_id.split('_')) == 6:
|
|
388
|
+
src_provider = PLANETARYCOMPUTER
|
|
389
|
+
else:
|
|
390
|
+
src_provider = CDSE
|
|
391
|
+
elif clean_id.endswith('L2A') or clean_id.endswith("L1C"):
|
|
392
|
+
src_provider = ELEMENT84
|
|
393
|
+
else:
|
|
394
|
+
raise S2StacError(f"Cannot determine provider from ID: {product_id}")
|
|
395
|
+
|
|
396
|
+
try:
|
|
397
|
+
client = Client.open(src_provider.url)
|
|
398
|
+
search = client.search(ids=[clean_id])
|
|
399
|
+
coll = search.item_collection()
|
|
400
|
+
items = list(coll.items)
|
|
401
|
+
|
|
402
|
+
if not items:
|
|
403
|
+
logger.warning(f"No {src_provider.name} product found for ID: {product_id}")
|
|
404
|
+
return None
|
|
405
|
+
|
|
406
|
+
return items[0]
|
|
407
|
+
|
|
408
|
+
except Exception as e:
|
|
409
|
+
logger.error(f"Error retrieving item {product_id}: {e}")
|
|
410
|
+
raise S2StacError(f"Failed to retrieve item {product_id}: {e}")
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
def stac_to_qvf(item, stage="adb"):
|
|
416
|
+
# TODO: some zone codes are 2 digits
|
|
417
|
+
# we'll try to deal with either element84 or planetary computer style
|
|
418
|
+
#
|
|
419
|
+
satellite = item.properties['platform']
|
|
420
|
+
try:
|
|
421
|
+
grid_code = item.properties['grid:code']
|
|
422
|
+
tile = S2Scene(grid_code)
|
|
423
|
+
except KeyError:
|
|
424
|
+
grid_code = item.properties['s2:mgrs_tile']
|
|
425
|
+
tile = S2Scene(f"t{grid_code.lower()}")
|
|
426
|
+
|
|
427
|
+
when = item.datetime.strftime("%Y%m%d")
|
|
428
|
+
|
|
429
|
+
match satellite.lower():
|
|
430
|
+
case "sentinel-2a":
|
|
431
|
+
qsensor = "cemsre"
|
|
432
|
+
case "sentinel-2b":
|
|
433
|
+
qsensor = "cfmsre"
|
|
434
|
+
case "sentinel-2c":
|
|
435
|
+
qsensor = "cgmsre"
|
|
436
|
+
case _:
|
|
437
|
+
raise ValueError(f"satellite {satellite} not recognized")
|
|
438
|
+
# s2scene will be t54kjf
|
|
439
|
+
# so we want the 3rd element
|
|
440
|
+
zonecode = tile.qvf_style()[2]
|
|
441
|
+
qvf_name = f"{qsensor}_{tile.qvf_style()}_{when}_{stage}m{zonecode}.tif"
|
|
442
|
+
return qvf_name
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
def qvf_to_stac(
|
|
446
|
+
qvfname: str,
|
|
447
|
+
dst_provider: StacProvider = ELEMENT84,
|
|
448
|
+
level: str = "l2a",
|
|
449
|
+
geom: Optional[str] = None
|
|
450
|
+
) -> ItemCollection:
|
|
451
|
+
"""
|
|
452
|
+
Given a qvf name, find the matching stac item according to the stac provider
|
|
453
|
+
eg cfmsre_t56jlq_20170808_adbm6.tif
|
|
454
|
+
|
|
455
|
+
Args:
|
|
456
|
+
qvfname: QVF filename to parse
|
|
457
|
+
dst_provider: STAC provider to search
|
|
458
|
+
level: Processing level (e.g., 'l2a')
|
|
459
|
+
geom: Optional geometry as JSON string to filter when there are multiple matches
|
|
460
|
+
|
|
461
|
+
Returns:
|
|
462
|
+
ItemCollection with matching STAC items
|
|
463
|
+
|
|
464
|
+
Raises:
|
|
465
|
+
ValueError: If satellite identifier is not recognized
|
|
466
|
+
"""
|
|
467
|
+
what, where, when, _ = qvfname.split('_')
|
|
468
|
+
satellite = what[1]
|
|
469
|
+
match satellite.lower():
|
|
470
|
+
case "e":
|
|
471
|
+
platform = "sentinel-2a"
|
|
472
|
+
case "f":
|
|
473
|
+
platform = "sentinel-2b"
|
|
474
|
+
case "g":
|
|
475
|
+
platform = "sentinel-2c"
|
|
476
|
+
case _:
|
|
477
|
+
raise ValueError(f"satellite {satellite} not recognized")
|
|
478
|
+
|
|
479
|
+
date_format = '%Y%m%d'
|
|
480
|
+
naive_datetime = datetime.strptime(when, date_format)
|
|
481
|
+
datestring = naive_datetime.strftime("%Y-%m-%d")
|
|
482
|
+
|
|
483
|
+
grid_code = S2Scene(where).mgrs_style()
|
|
484
|
+
# pc uses slightly different format
|
|
485
|
+
if dst_provider.name == 'planetary-computer':
|
|
486
|
+
grid_code = grid_code.replace('MGRS-','')
|
|
487
|
+
grid_code_key = dst_provider.__getattribute__("mgrs_tile")
|
|
488
|
+
|
|
489
|
+
client = Client.open(dst_provider.url)
|
|
490
|
+
collection = dst_provider.__getattribute__(level)
|
|
491
|
+
logger.debug(f"{collection=}, {grid_code=}")
|
|
492
|
+
search = client.search(
|
|
493
|
+
collections=[collection],
|
|
494
|
+
datetime=[datestring],
|
|
495
|
+
query={
|
|
496
|
+
grid_code_key: {"eq": grid_code}
|
|
497
|
+
}
|
|
498
|
+
)
|
|
499
|
+
coll = search.item_collection()
|
|
500
|
+
|
|
501
|
+
# Filter collections since element84 doesn't use 'in'
|
|
502
|
+
filtered_items = []
|
|
503
|
+
for item in coll.items:
|
|
504
|
+
if item.properties.get('platform', '').lower() == platform:
|
|
505
|
+
filtered_items.append(item)
|
|
506
|
+
coll.items = filtered_items
|
|
507
|
+
|
|
508
|
+
if len(coll.items) > 1 and geom is not None:
|
|
509
|
+
matching_item = filter_items(coll.items, geom)
|
|
510
|
+
coll.items = [matching_item]
|
|
511
|
+
|
|
512
|
+
return coll
|
|
513
|
+
|
|
514
|
+
|
|
515
|
+
def load_polygon_from_geojson(geom_data: str | dict) -> Polygon:
|
|
516
|
+
"""
|
|
517
|
+
Load polygon from GeoJSON-like data structure
|
|
518
|
+
|
|
519
|
+
Args:
|
|
520
|
+
geom_data: GeoJSON geometry as string or dictionary
|
|
521
|
+
|
|
522
|
+
Returns:
|
|
523
|
+
Shapely Polygon object
|
|
524
|
+
|
|
525
|
+
Raises:
|
|
526
|
+
json.JSONDecodeError: If string cannot be parsed as JSON
|
|
527
|
+
KeyError: If required 'coordinates' key is missing
|
|
528
|
+
"""
|
|
529
|
+
if isinstance(geom_data, str):
|
|
530
|
+
geom_data = json.loads(geom_data)
|
|
531
|
+
|
|
532
|
+
coords = geom_data['coordinates'][0] # Get exterior ring
|
|
533
|
+
return Polygon(coords)
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
def calculate_intersection_over_union(poly1: Polygon, poly2: Polygon) -> float:
|
|
537
|
+
"""
|
|
538
|
+
Calculate IoU (Jaccard index) - higher is better similarity
|
|
539
|
+
|
|
540
|
+
Args:
|
|
541
|
+
poly1: First polygon
|
|
542
|
+
poly2: Second polygon
|
|
543
|
+
|
|
544
|
+
Returns:
|
|
545
|
+
IoU score between 0 and 1, where 1 is perfect overlap
|
|
546
|
+
"""
|
|
547
|
+
intersection = poly1.intersection(poly2).area
|
|
548
|
+
union = poly1.union(poly2).area
|
|
549
|
+
|
|
550
|
+
if union == 0:
|
|
551
|
+
return 0.0
|
|
552
|
+
return intersection / union
|
|
553
|
+
|
|
554
|
+
|
|
555
|
+
|
|
556
|
+
def get_largest_polygon_shapely(geojson):
|
|
557
|
+
"""
|
|
558
|
+
Extract the largest polygon using Shapely.
|
|
559
|
+
|
|
560
|
+
Args:
|
|
561
|
+
geojson: A GeoJSON dict (Polygon or MultiPolygon)
|
|
562
|
+
|
|
563
|
+
Returns:
|
|
564
|
+
A GeoJSON Polygon dict with the largest polygon
|
|
565
|
+
"""
|
|
566
|
+
geom = shape(geojson)
|
|
567
|
+
|
|
568
|
+
# If it's already a Polygon, return as-is
|
|
569
|
+
if geom.geom_type == 'Polygon':
|
|
570
|
+
return geojson
|
|
571
|
+
|
|
572
|
+
# If it's a MultiPolygon, get the largest by area
|
|
573
|
+
if geom.geom_type == 'MultiPolygon':
|
|
574
|
+
largest = max(geom.geoms, key=lambda p: p.area)
|
|
575
|
+
return mapping(largest)
|
|
576
|
+
|
|
577
|
+
raise ValueError(f"Unsupported geometry type: {geom.geom_type}")
|
|
578
|
+
|
|
579
|
+
|
|
580
|
+
|
|
581
|
+
def filter_items(items: List[Item], refgeom: str) -> Item:
|
|
582
|
+
"""
|
|
583
|
+
Filter items by geometry similarity, returning the one with highest IoU
|
|
584
|
+
|
|
585
|
+
Args:
|
|
586
|
+
items: List of STAC items to filter (must contain exactly 2 items)
|
|
587
|
+
refgeom: Reference geometry as JSON string
|
|
588
|
+
|
|
589
|
+
Returns:
|
|
590
|
+
STAC item with highest IoU score against reference geometry
|
|
591
|
+
|
|
592
|
+
Raises:
|
|
593
|
+
ValueError: If number of items is not exactly 2
|
|
594
|
+
json.JSONDecodeError: If refgeom cannot be parsed as JSON
|
|
595
|
+
"""
|
|
596
|
+
if len(items) != 2:
|
|
597
|
+
raise ValueError(f"Expected exactly 2 items for filtering, got {len(items)}")
|
|
598
|
+
|
|
599
|
+
# make sure we are using simpl polygons not multi polygons
|
|
600
|
+
p1 = get_largest_polygon_shapely(items[0].geometry)
|
|
601
|
+
p2 = get_largest_polygon_shapely(items[1].geometry)
|
|
602
|
+
|
|
603
|
+
poly1 = load_polygon_from_geojson(p1)
|
|
604
|
+
poly2 = load_polygon_from_geojson(p2)
|
|
605
|
+
refpoly = load_polygon_from_geojson(refgeom)
|
|
606
|
+
|
|
607
|
+
iou1 = calculate_intersection_over_union(poly1, refpoly)
|
|
608
|
+
iou2 = calculate_intersection_over_union(poly2, refpoly)
|
|
609
|
+
|
|
610
|
+
if iou1 > iou2:
|
|
611
|
+
return items[0]
|
|
612
|
+
else:
|
|
613
|
+
return items[1]
|
|
614
|
+
|
|
615
|
+
|
|
616
|
+
def extract_stac_item_from_tiff(tif_file: str) -> Item:
|
|
617
|
+
"""
|
|
618
|
+
Extract STAC item metadata from a TIFF file.
|
|
619
|
+
|
|
620
|
+
Args:
|
|
621
|
+
tif_file (str): Path to the TIFF file
|
|
622
|
+
|
|
623
|
+
Returns:
|
|
624
|
+
pystac.Item: The STAC item object
|
|
625
|
+
|
|
626
|
+
Raises:
|
|
627
|
+
ValueError: If no SLATS metadata found or metadata cannot be processed
|
|
628
|
+
ET.ParseError: If GDAL metadata XML cannot be parsed
|
|
629
|
+
"""
|
|
630
|
+
info = tifftools.read_tiff(str(tif_file))
|
|
631
|
+
|
|
632
|
+
# should be in ifd 0
|
|
633
|
+
ifd = 0
|
|
634
|
+
ifd_info = info['ifds'][ifd]
|
|
635
|
+
tags = ifd_info.get('tags', {})
|
|
636
|
+
|
|
637
|
+
gdal_metadata = None
|
|
638
|
+
slats_metadata = None
|
|
639
|
+
|
|
640
|
+
for tag_name, tag_data in tags.items():
|
|
641
|
+
if isinstance(tag_data, dict) and 'data' in tag_data:
|
|
642
|
+
tag_value = tag_data['data']
|
|
643
|
+
|
|
644
|
+
# Convert tag data to string if it's bytes or a list
|
|
645
|
+
if isinstance(tag_value, list):
|
|
646
|
+
try:
|
|
647
|
+
tag_value = ''.join([str(x) for x in tag_value])
|
|
648
|
+
except TypeError:
|
|
649
|
+
tag_value = str(tag_value)
|
|
650
|
+
elif isinstance(tag_value, bytes):
|
|
651
|
+
tag_value = tag_value.decode('utf-8', errors='ignore')
|
|
652
|
+
|
|
653
|
+
# Check if this is GDAL metadata XML
|
|
654
|
+
if isinstance(tag_value, str) and tag_value.strip().startswith('<GDALMetadata>'):
|
|
655
|
+
gdal_metadata = tag_value
|
|
656
|
+
break
|
|
657
|
+
# Check if this looks like direct SLATS metadata
|
|
658
|
+
elif ('SLATS_Metadata2_zipped' in str(tag_name) or
|
|
659
|
+
(isinstance(tag_value, str) and 'SLATS' in tag_value)):
|
|
660
|
+
slats_metadata = tag_value
|
|
661
|
+
break
|
|
662
|
+
|
|
663
|
+
# Parse GDAL metadata XML to find SLATS_Metadata2_zipped
|
|
664
|
+
if gdal_metadata and not slats_metadata:
|
|
665
|
+
try:
|
|
666
|
+
root = ET.fromstring(gdal_metadata)
|
|
667
|
+
for item in root.findall('Item'):
|
|
668
|
+
if item.get('name') == 'SLATS_Metadata2_zipped':
|
|
669
|
+
slats_metadata = item.text
|
|
670
|
+
break
|
|
671
|
+
except ET.ParseError as e:
|
|
672
|
+
raise ET.ParseError(f"Error parsing GDAL metadata XML: {e}")
|
|
673
|
+
|
|
674
|
+
if not slats_metadata:
|
|
675
|
+
raise ValueError("No SLATS metadata found in TIFF file")
|
|
676
|
+
|
|
677
|
+
# Extract and process metadata
|
|
678
|
+
mdata = slats_metadata
|
|
679
|
+
if mdata.startswith(r"b'"):
|
|
680
|
+
mdata = mdata[2:-1]
|
|
681
|
+
|
|
682
|
+
try:
|
|
683
|
+
# Decompress and parse
|
|
684
|
+
decompressed = zlib.decompress(base64.b64decode(mdata))
|
|
685
|
+
hobj = json.loads(decompressed)
|
|
686
|
+
json_item = json.loads(hobj['thismeta']['STAC_ITEM'])
|
|
687
|
+
return Item.from_dict(json_item)
|
|
688
|
+
except (zlib.error, base64.binascii.Error, json.JSONDecodeError, KeyError) as e:
|
|
689
|
+
raise ValueError(f"Error processing SLATS metadata: {e}")
|
|
690
|
+
|
|
691
|
+
|
|
692
|
+
# concatenate items to create an item collection
|
|
693
|
+
# these may in fact be feature collections
|
|
694
|
+
|
|
695
|
+
def read_items(url: str)-> List[Item]:
|
|
696
|
+
"""
|
|
697
|
+
Reads and returns a list of STAC items from a given URL or file path.
|
|
698
|
+
|
|
699
|
+
The input URL or file is expected to represent either a single STAC Item or an ItemCollection.
|
|
700
|
+
If the input is a single Item, it is wrapped in a list. If it is an ItemCollection, all items
|
|
701
|
+
within the collection are returned as a list.
|
|
702
|
+
|
|
703
|
+
Args:
|
|
704
|
+
url (str): The URL or file path pointing to a STAC Item or ItemCollection.
|
|
705
|
+
|
|
706
|
+
Returns:
|
|
707
|
+
List[Item]: A list of STAC Items.
|
|
708
|
+
|
|
709
|
+
Raises:
|
|
710
|
+
STACTypeError: If the input file is not a valid STAC Item or ItemCollection.
|
|
711
|
+
"""
|
|
712
|
+
|
|
713
|
+
try:
|
|
714
|
+
items = [Item.from_file(url)]
|
|
715
|
+
except STACTypeError:
|
|
716
|
+
jcol = ItemCollection.from_file(url)
|
|
717
|
+
items = jcol.items
|
|
718
|
+
return items
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: rss_da_stac
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Utilities for JRSRP users to interact with common STAC providers
|
|
5
|
+
Author-email: rdenham <robert.denham@detsi.qld.gov.au>
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Requires-Python: >=3.11
|
|
8
|
+
Requires-Dist: planetary-computer>=1.0.0
|
|
9
|
+
Requires-Dist: pystac-client<0.10.0
|
|
10
|
+
Requires-Dist: pystac>=1.13.0
|
|
11
|
+
Requires-Dist: pytz>=2025.2
|
|
12
|
+
Requires-Dist: rasterio>=1.4.3
|
|
13
|
+
Requires-Dist: shapely>=2.1.1
|
|
14
|
+
Requires-Dist: tifftools>=1.6.1
|
|
15
|
+
Requires-Dist: typer>=0.16.0
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
|
|
18
|
+
# `rss_da_stac` - Python utilities for interacting with common STAC providers
|
|
19
|
+
|
|
20
|
+
Remote Sensing Scientists associated with the [JRSRP](https://www.jrsrp.org.au/)
|
|
21
|
+
have several established work practices that have been developed over many years,
|
|
22
|
+
and predate STAC and cloud-native Earth Observation (EO) data.
|
|
23
|
+
|
|
24
|
+
This package is designed To help integrate
|
|
25
|
+
their current work approaches with modern data providers via STAC. Of particular
|
|
26
|
+
interest is the interoperability of the [`qvf`](https://jrsrp.gitlab.io/sys/meta_info/)
|
|
27
|
+
name convention and how that relates to STAC. Several utilities are designed
|
|
28
|
+
to work with `qvf` style names, such as
|
|
29
|
+
|
|
30
|
+
* `qvf2item`: Convert one or more QVF files to a STAC ItemCollection
|
|
31
|
+
* `item2qvf`: Given a stac item or itemcollection, provide the equivalent qvf compliant name for each item.
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
see [notes](docs/index.md) for details on the included scripts and the API.
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
rss_da_stac/__init__.py,sha256=CO4zZ9hwoQr23X0Ln4Kl6iv7NnkRekc7Pp8eZ6vwPfQ,1051
|
|
2
|
+
rss_da_stac/_version.py,sha256=qf6R-J7-UyuABBo8c0HgaquJ8bejVbf07HodXgwAwgQ,704
|
|
3
|
+
rss_da_stac/cli.py,sha256=qOcZ4GW3-_DSoqqAXHXHj80ViACLMytj0c1jL_IT5pY,7594
|
|
4
|
+
rss_da_stac/uri2l2a.py,sha256=h-baK_XUcpyRMtYEo4v5wdisgv48t8PEaETjpzhrf6k,6321
|
|
5
|
+
rss_da_stac/utils.py,sha256=fAq3WT7uzAp0pVUpSq5rC_1JOQQ2KWYlVmSo55dj6LY,22863
|
|
6
|
+
rss_da_stac-0.0.1.dist-info/METADATA,sha256=K59mE8Vs0taMlbM5VJ6-spyl-ImZvdBKhYgmbICaDXg,1425
|
|
7
|
+
rss_da_stac-0.0.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
8
|
+
rss_da_stac-0.0.1.dist-info/entry_points.txt,sha256=J6QnpfgGs8fP0M5Y9xFX4HNUgc0AsZRyrhw_cVgN_48,83
|
|
9
|
+
rss_da_stac-0.0.1.dist-info/licenses/LICENSE,sha256=VRrbB2lTGdV85ryNqQM4kPuADiUfyfZk0C6TvccpCCc,908
|
|
10
|
+
rss_da_stac-0.0.1.dist-info/RECORD,,
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
MIT No Attribution
|
|
2
|
+
|
|
3
|
+
Copyright 2025 Robert Denham
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so.
|
|
11
|
+
|
|
12
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
13
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
14
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
15
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
16
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
17
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
18
|
+
SOFTWARE.
|