eoapi-cdk 8.1.1 → 8.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.jsii +698 -26
- package/lib/bastion-host/index.js +1 -1
- package/lib/database/index.d.ts +1 -0
- package/lib/database/index.js +5 -5
- package/lib/index.d.ts +2 -0
- package/lib/index.js +3 -1
- package/lib/ingestor-api/index.js +1 -1
- package/lib/stac-api/index.js +1 -1
- package/lib/stac-browser/index.js +1 -1
- package/lib/stac-item-loader/index.d.ts +337 -0
- package/lib/stac-item-loader/index.js +255 -0
- package/lib/stac-item-loader/runtime/Dockerfile +18 -0
- package/lib/stac-item-loader/runtime/pyproject.toml +17 -0
- package/lib/stac-item-loader/runtime/src/stac_item_loader/handler.py +241 -0
- package/lib/stactools-item-generator/index.d.ts +258 -0
- package/lib/stactools-item-generator/index.js +208 -0
- package/lib/stactools-item-generator/runtime/Dockerfile +20 -0
- package/lib/stactools-item-generator/runtime/pyproject.toml +16 -0
- package/lib/stactools-item-generator/runtime/src/stactools_item_generator/__init__.py +2 -0
- package/lib/stactools-item-generator/runtime/src/stactools_item_generator/handler.py +176 -0
- package/lib/stactools-item-generator/runtime/src/stactools_item_generator/item.py +77 -0
- package/lib/tipg-api/index.js +1 -1
- package/lib/titiler-pgstac-api/index.js +1 -1
- package/package.json +1 -1
- package/pyproject.toml +45 -0
- package/uv.lock +1065 -0
- package/.devcontainer/devcontainer.json +0 -4
- package/.github/pull_request_template.md +0 -4
- package/.github/workflows/build.yaml +0 -73
- package/.github/workflows/build_and_release.yaml +0 -13
- package/.github/workflows/conventional-pr.yaml +0 -26
- package/.github/workflows/deploy.yaml +0 -84
- package/.github/workflows/distribute.yaml +0 -46
- package/.github/workflows/docs.yaml +0 -26
- package/.github/workflows/lint.yaml +0 -26
- package/.github/workflows/tox.yaml +0 -26
- package/.nvmrc +0 -1
- package/.pre-commit-config.yaml +0 -23
- package/CHANGELOG.md +0 -471
- package/diagrams/bastion_diagram.excalidraw +0 -1416
- package/diagrams/bastion_diagram.png +0 -0
- package/diagrams/ingestor_diagram.excalidraw +0 -2274
- package/diagrams/ingestor_diagram.png +0 -0
- package/integration_tests/cdk/README.md +0 -55
- package/integration_tests/cdk/app.py +0 -186
- package/integration_tests/cdk/cdk.json +0 -32
- package/integration_tests/cdk/config.py +0 -52
- package/integration_tests/cdk/package-lock.json +0 -42
- package/integration_tests/cdk/package.json +0 -7
- package/integration_tests/cdk/requirements.txt +0 -7
- package/lib/database/lambda/package-lock.json +0 -1324
- package/lib/ingestor-api/runtime/tests/conftest.py +0 -270
- package/lib/ingestor-api/runtime/tests/test_collection.py +0 -87
- package/lib/ingestor-api/runtime/tests/test_collection_endpoint.py +0 -41
- package/lib/ingestor-api/runtime/tests/test_ingestor.py +0 -60
- package/lib/ingestor-api/runtime/tests/test_registration.py +0 -207
- package/lib/ingestor-api/runtime/tests/test_utils.py +0 -35
- package/lib/ingestor-api/runtime/tests/test_validators.py +0 -164
- package/ruff.toml +0 -23
- package/tox.ini +0 -16
- package/tsconfig.tsbuildinfo +0 -1
- /package/lib/{ingestor-api/runtime/tests → stac-item-loader/runtime/src/stac_item_loader}/__init__.py +0 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "stac-item-loader"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "An application for loading STAC items into a pgstac database"
|
|
5
|
+
authors = [
|
|
6
|
+
{ name = "hrodmn", email = "henry@developmentseed.org" }
|
|
7
|
+
]
|
|
8
|
+
requires-python = ">=3.11"
|
|
9
|
+
dependencies = [
|
|
10
|
+
"boto3",
|
|
11
|
+
"pypgstac[psycopg]",
|
|
12
|
+
"stac-pydantic>=3.2.0",
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
[build-system]
|
|
16
|
+
requires = ["hatchling"]
|
|
17
|
+
build-backend = "hatchling.build"
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
from collections import defaultdict
|
|
6
|
+
from typing import (
|
|
7
|
+
TYPE_CHECKING,
|
|
8
|
+
Annotated,
|
|
9
|
+
Any,
|
|
10
|
+
DefaultDict,
|
|
11
|
+
Dict,
|
|
12
|
+
List,
|
|
13
|
+
Optional,
|
|
14
|
+
TypedDict,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
import boto3.session
|
|
18
|
+
from pydantic import ValidationError
|
|
19
|
+
from pypgstac.db import PgstacDB
|
|
20
|
+
from pypgstac.load import Loader, Methods
|
|
21
|
+
from stac_pydantic.item import Item
|
|
22
|
+
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
from aws_lambda_typing.context import Context
|
|
25
|
+
else:
|
|
26
|
+
Context = Annotated[object, "Context object"]
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger()
|
|
29
|
+
if logger.hasHandlers():
|
|
30
|
+
logger.handlers.clear()
|
|
31
|
+
|
|
32
|
+
log_handler = logging.StreamHandler() # <--- Renamed handler variable
|
|
33
|
+
|
|
34
|
+
log_level_name = os.environ.get("LOG_LEVEL", "INFO").upper()
|
|
35
|
+
log_level = logging._nameToLevel.get(log_level_name, logging.INFO)
|
|
36
|
+
logger.setLevel(log_level)
|
|
37
|
+
|
|
38
|
+
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
|
39
|
+
log_handler.setFormatter(formatter)
|
|
40
|
+
logger.addHandler(log_handler)
|
|
41
|
+
|
|
42
|
+
botocore_logger = logging.getLogger("botocore")
|
|
43
|
+
botocore_logger.setLevel(logging.WARN)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class BatchItemFailure(TypedDict):
|
|
47
|
+
itemIdentifier: str
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class PartialBatchFailureResponse(TypedDict):
|
|
51
|
+
batchItemFailures: List[BatchItemFailure]
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def get_secret_dict(secret_name: str) -> Dict:
|
|
55
|
+
"""Retrieve secrets from AWS Secrets Manager
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
secret_name (str): name of aws secrets manager secret containing database connection secrets
|
|
59
|
+
profile_name (str, optional): optional name of aws profile for use in debugger only
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
secrets (dict): decrypted secrets in dict
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
# Create a Secrets Manager client
|
|
66
|
+
session = boto3.session.Session()
|
|
67
|
+
client = session.client(service_name="secretsmanager")
|
|
68
|
+
|
|
69
|
+
get_secret_value_response = client.get_secret_value(SecretId=secret_name)
|
|
70
|
+
|
|
71
|
+
if "SecretString" in get_secret_value_response:
|
|
72
|
+
return json.loads(get_secret_value_response["SecretString"])
|
|
73
|
+
else:
|
|
74
|
+
return json.loads(base64.b64decode(get_secret_value_response["SecretBinary"]))
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def get_pgstac_dsn() -> str:
|
|
78
|
+
secret_arn = os.getenv("PGSTAC_SECRET_ARN")
|
|
79
|
+
if not secret_arn:
|
|
80
|
+
logger.error("Environment variable PGSTAC_SECRET_ARN is not set.")
|
|
81
|
+
raise EnvironmentError("PGSTAC_SECRET_ARN must be set")
|
|
82
|
+
|
|
83
|
+
secret_dict = get_secret_dict(secret_name=secret_arn)
|
|
84
|
+
|
|
85
|
+
return f"postgres://{secret_dict['username']}:{secret_dict['password']}@{secret_dict['host']}:{secret_dict['port']}/{secret_dict['dbname']}"
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def is_s3_event(message_str: str) -> bool:
|
|
89
|
+
"""Check if the event data is an S3 event notification."""
|
|
90
|
+
return "aws:s3" in message_str
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def get_stac_item_from_s3(bucket_name: str, object_key: str) -> Dict[str, Any]:
|
|
94
|
+
"""Fetch STAC item JSON from S3."""
|
|
95
|
+
session = boto3.session.Session()
|
|
96
|
+
s3_client = session.client("s3")
|
|
97
|
+
|
|
98
|
+
try:
|
|
99
|
+
logger.debug(f"Fetching STAC item from s3://{bucket_name}/{object_key}")
|
|
100
|
+
response = s3_client.get_object(Bucket=bucket_name, Key=object_key)
|
|
101
|
+
content = response["Body"].read()
|
|
102
|
+
|
|
103
|
+
try:
|
|
104
|
+
stac_item_json = content.decode("utf-8")
|
|
105
|
+
except UnicodeDecodeError as e:
|
|
106
|
+
logger.error(
|
|
107
|
+
f"Failed to decode S3 object as UTF-8: s3://{bucket_name}/{object_key}"
|
|
108
|
+
)
|
|
109
|
+
raise ValueError("S3 object is not valid UTF-8 text") from e
|
|
110
|
+
|
|
111
|
+
stac_item_data = json.loads(stac_item_json)
|
|
112
|
+
logger.debug(
|
|
113
|
+
f"Successfully parsed STAC item from S3: {stac_item_data.get('id', 'unknown')}"
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
return stac_item_data
|
|
117
|
+
|
|
118
|
+
except Exception as e:
|
|
119
|
+
logger.error(
|
|
120
|
+
f"Failed to fetch STAC item from s3://{bucket_name}/{object_key}: {e}"
|
|
121
|
+
)
|
|
122
|
+
raise
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def process_s3_event(message_str: str) -> Dict[str, Any]:
|
|
126
|
+
"""Process an S3 event notification and return STAC item data."""
|
|
127
|
+
try:
|
|
128
|
+
message_data = json.loads(message_str)
|
|
129
|
+
records: List[Dict[str, Any]] = message_data.get("Records", [])
|
|
130
|
+
if not records:
|
|
131
|
+
raise ValueError("no S3 event records!")
|
|
132
|
+
elif len(records) > 1:
|
|
133
|
+
raise ValueError("more than one S3 event record!")
|
|
134
|
+
|
|
135
|
+
s3_data = records[0]["s3"]
|
|
136
|
+
bucket_name = s3_data["bucket"]["name"]
|
|
137
|
+
object_key = s3_data["object"]["key"]
|
|
138
|
+
|
|
139
|
+
# Validate that this looks like a STAC item file
|
|
140
|
+
if not object_key.endswith((".json", ".geojson")):
|
|
141
|
+
raise ValueError(
|
|
142
|
+
f"S3 object key does not appear to be a STAC item: {object_key}"
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
stac_item_data = get_stac_item_from_s3(bucket_name, object_key)
|
|
146
|
+
|
|
147
|
+
return stac_item_data
|
|
148
|
+
|
|
149
|
+
except KeyError as e:
|
|
150
|
+
logger.error(f"S3 event missing required field: {e}")
|
|
151
|
+
raise ValueError(f"Invalid S3 event structure: missing {e}") from e
|
|
152
|
+
except Exception as e:
|
|
153
|
+
logger.error(f"Failed to process S3 event: {e}")
|
|
154
|
+
raise
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def handler(
|
|
158
|
+
event: Dict[str, Any], context: Context
|
|
159
|
+
) -> Optional[PartialBatchFailureResponse]:
|
|
160
|
+
records = event.get("Records", [])
|
|
161
|
+
aws_request_id = getattr(context, "aws_request_id", "N/A")
|
|
162
|
+
remaining_time = getattr(context, "get_remaining_time_in_millis", lambda: "N/A")()
|
|
163
|
+
|
|
164
|
+
logger.info(f"Received batch with {len(records)} records.")
|
|
165
|
+
logger.debug(
|
|
166
|
+
f"Lambda Context: RequestId={aws_request_id}, RemainingTime={remaining_time}ms"
|
|
167
|
+
)
|
|
168
|
+
pgstac_dsn = get_pgstac_dsn()
|
|
169
|
+
|
|
170
|
+
batch_item_failures: List[BatchItemFailure] = []
|
|
171
|
+
|
|
172
|
+
items_by_collection: DefaultDict[str, List[Dict[str, Any]]] = defaultdict(list)
|
|
173
|
+
message_ids_by_collection: DefaultDict[str, List[str]] = defaultdict(list)
|
|
174
|
+
|
|
175
|
+
for record in records:
|
|
176
|
+
message_id = record.get("messageId")
|
|
177
|
+
if not message_id:
|
|
178
|
+
logger.warning("Record missing messageId, cannot report failure for it.")
|
|
179
|
+
continue
|
|
180
|
+
|
|
181
|
+
try:
|
|
182
|
+
sqs_body_str = record["body"]
|
|
183
|
+
logger.debug(f"[{message_id}] SQS message body: {sqs_body_str}")
|
|
184
|
+
sns_notification = json.loads(sqs_body_str)
|
|
185
|
+
|
|
186
|
+
message_str = sns_notification["Message"]
|
|
187
|
+
logger.debug(f"[{message_id}] SNS Message content: {message_str}")
|
|
188
|
+
|
|
189
|
+
if is_s3_event(message_str):
|
|
190
|
+
logger.debug(f"[{message_id}] Processing S3 event notification")
|
|
191
|
+
message_data = process_s3_event(message_str)
|
|
192
|
+
else:
|
|
193
|
+
message_data = json.loads(message_str)
|
|
194
|
+
|
|
195
|
+
item = Item(**message_data)
|
|
196
|
+
|
|
197
|
+
if not item.collection:
|
|
198
|
+
raise KeyError(f"item {item.id} is missing a collection id!")
|
|
199
|
+
|
|
200
|
+
items_by_collection[item.collection].append(item.model_dump(mode="json"))
|
|
201
|
+
message_ids_by_collection[item.collection].append(message_id)
|
|
202
|
+
logger.debug(f"[{message_id}] Successfully processed.")
|
|
203
|
+
|
|
204
|
+
except (ValueError, KeyError, ValidationError, json.JSONDecodeError) as e:
|
|
205
|
+
logger.error(f"[{message_id}] Failed with error: {e}", extra=record)
|
|
206
|
+
batch_item_failures.append({"itemIdentifier": message_id})
|
|
207
|
+
except Exception as e:
|
|
208
|
+
logger.error(f"[{message_id}] Unexpected error: {e}", extra=record)
|
|
209
|
+
batch_item_failures.append({"itemIdentifier": message_id})
|
|
210
|
+
|
|
211
|
+
for collection_id, items in items_by_collection.items():
|
|
212
|
+
try:
|
|
213
|
+
with PgstacDB(dsn=pgstac_dsn) as db:
|
|
214
|
+
loader = Loader(db=db)
|
|
215
|
+
logger.info(f"[{collection_id}] loading items into database.")
|
|
216
|
+
loader.load_items(
|
|
217
|
+
file=items, # type: ignore
|
|
218
|
+
insert_mode=Methods.upsert,
|
|
219
|
+
)
|
|
220
|
+
logger.info(f"[{collection_id}] successfully loaded {len(items)} items.")
|
|
221
|
+
except Exception as e:
|
|
222
|
+
logger.error(f"[{collection_id}] failed to load items: {str(e)}")
|
|
223
|
+
|
|
224
|
+
batch_item_failures.extend(
|
|
225
|
+
[
|
|
226
|
+
{"itemIdentifier": message_id}
|
|
227
|
+
for message_id in message_ids_by_collection[collection_id]
|
|
228
|
+
]
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
if batch_item_failures:
|
|
232
|
+
logger.warning(
|
|
233
|
+
f"Finished processing batch. {len(batch_item_failures)} failure(s) reported."
|
|
234
|
+
)
|
|
235
|
+
logger.info(
|
|
236
|
+
f"Returning failed item identifiers: {[f['itemIdentifier'] for f in batch_item_failures]}"
|
|
237
|
+
)
|
|
238
|
+
return {"batchItemFailures": batch_item_failures}
|
|
239
|
+
else:
|
|
240
|
+
logger.info("Finished processing batch. All records successful.")
|
|
241
|
+
return None
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
import { aws_ec2 as ec2, aws_lambda as lambda, aws_sqs as sqs, aws_sns as sns } from "aws-cdk-lib";
|
|
2
|
+
import { Construct } from "constructs";
|
|
3
|
+
import { CustomLambdaFunctionProps } from "../utils";
|
|
4
|
+
/**
|
|
5
|
+
* Configuration properties for the StactoolsItemGenerator construct.
|
|
6
|
+
*
|
|
7
|
+
* The StactoolsItemGenerator is part of a two-phase serverless STAC ingestion pipeline
|
|
8
|
+
* that generates STAC items from source data. This construct creates the
|
|
9
|
+
* infrastructure for the first phase of the pipeline - processing metadata
|
|
10
|
+
* about assets and transforming them into standardized STAC items.
|
|
11
|
+
*
|
|
12
|
+
* @example
|
|
13
|
+
* const generator = new StactoolsItemGenerator(this, 'ItemGenerator', {
|
|
14
|
+
* itemLoadTopicArn: loader.topic.topicArn,
|
|
15
|
+
* lambdaTimeoutSeconds: 120,
|
|
16
|
+
* maxConcurrency: 100,
|
|
17
|
+
* batchSize: 10
|
|
18
|
+
* });
|
|
19
|
+
*/
|
|
20
|
+
export interface StactoolsItemGeneratorProps {
|
|
21
|
+
/**
|
|
22
|
+
* The lambda runtime to use for the item generation function.
|
|
23
|
+
*
|
|
24
|
+
* The function is containerized using Docker and can accommodate various
|
|
25
|
+
* stactools packages. The runtime version should be compatible with the
|
|
26
|
+
* packages you plan to use for STAC item generation.
|
|
27
|
+
*
|
|
28
|
+
* @default lambda.Runtime.PYTHON_3_11
|
|
29
|
+
*/
|
|
30
|
+
readonly lambdaRuntime?: lambda.Runtime;
|
|
31
|
+
/**
|
|
32
|
+
* VPC into which the lambda should be deployed.
|
|
33
|
+
*/
|
|
34
|
+
readonly vpc?: ec2.IVpc;
|
|
35
|
+
/**
|
|
36
|
+
* Subnet into which the lambda should be deployed.
|
|
37
|
+
*/
|
|
38
|
+
readonly subnetSelection?: ec2.SubnetSelection;
|
|
39
|
+
/**
|
|
40
|
+
* The timeout for the item generation lambda in seconds.
|
|
41
|
+
*
|
|
42
|
+
* This should accommodate the time needed to:
|
|
43
|
+
* - Install stactools packages using uvx
|
|
44
|
+
* - Download and process source data
|
|
45
|
+
* - Generate STAC metadata
|
|
46
|
+
* - Publish results to SNS
|
|
47
|
+
*
|
|
48
|
+
* The SQS visibility timeout will be set to this value plus 10 seconds.
|
|
49
|
+
*
|
|
50
|
+
* @default 120
|
|
51
|
+
*/
|
|
52
|
+
readonly lambdaTimeoutSeconds?: number;
|
|
53
|
+
/**
|
|
54
|
+
* Memory size for the lambda function in MB.
|
|
55
|
+
*
|
|
56
|
+
* Higher memory allocation may be needed for processing large geospatial
|
|
57
|
+
* datasets or when stactools packages have high memory requirements.
|
|
58
|
+
* More memory also provides proportionally more CPU power.
|
|
59
|
+
*
|
|
60
|
+
* @default 1024
|
|
61
|
+
*/
|
|
62
|
+
readonly memorySize?: number;
|
|
63
|
+
/**
|
|
64
|
+
* Maximum number of concurrent executions.
|
|
65
|
+
*
|
|
66
|
+
* This controls how many item generation tasks can run simultaneously.
|
|
67
|
+
* Higher concurrency enables faster processing of large batches but
|
|
68
|
+
* may strain downstream systems or external data sources.
|
|
69
|
+
*
|
|
70
|
+
* @default 100
|
|
71
|
+
*/
|
|
72
|
+
readonly maxConcurrency?: number;
|
|
73
|
+
/**
|
|
74
|
+
* SQS batch size for lambda event source.
|
|
75
|
+
*
|
|
76
|
+
* This determines how many generation requests are processed together
|
|
77
|
+
* in a single lambda invocation. Unlike the loader, generation typically
|
|
78
|
+
* processes items individually, so smaller batch sizes are common.
|
|
79
|
+
*
|
|
80
|
+
* @default 10
|
|
81
|
+
*/
|
|
82
|
+
readonly batchSize?: number;
|
|
83
|
+
/**
|
|
84
|
+
* Additional environment variables for the lambda function.
|
|
85
|
+
*
|
|
86
|
+
* These will be merged with default environment variables including
|
|
87
|
+
* ITEM_LOAD_TOPIC_ARN and LOG_LEVEL. Use this for custom configuration
|
|
88
|
+
* or to pass credentials for external data sources.
|
|
89
|
+
*/
|
|
90
|
+
readonly environment?: {
|
|
91
|
+
[key: string]: string;
|
|
92
|
+
};
|
|
93
|
+
/**
|
|
94
|
+
* ARN of the SNS topic to publish generated items to.
|
|
95
|
+
*
|
|
96
|
+
* This is typically the topic from a StacItemLoader construct.
|
|
97
|
+
* Generated STAC items will be published here for downstream
|
|
98
|
+
* processing and database insertion.
|
|
99
|
+
*/
|
|
100
|
+
readonly itemLoadTopicArn: string;
|
|
101
|
+
/**
|
|
102
|
+
* Can be used to override the default lambda function properties.
|
|
103
|
+
*
|
|
104
|
+
* @default - defined in the construct.
|
|
105
|
+
*/
|
|
106
|
+
readonly lambdaFunctionOptions?: CustomLambdaFunctionProps;
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* AWS CDK Construct for STAC Item Generation Infrastructure
|
|
110
|
+
*
|
|
111
|
+
* The StactoolsItemGenerator creates a serverless, event-driven system for generating
|
|
112
|
+
* STAC (SpatioTemporal Asset Catalog) items from source data. This construct
|
|
113
|
+
* implements the first phase of a two-stage ingestion pipeline that transforms
|
|
114
|
+
* raw geospatial data into standardized STAC metadata.
|
|
115
|
+
*
|
|
116
|
+
* ## Architecture Overview
|
|
117
|
+
*
|
|
118
|
+
* This construct creates the following AWS resources:
|
|
119
|
+
* - **SNS Topic**: Entry point for triggering item generation workflows
|
|
120
|
+
* - **SQS Queue**: Buffers generation requests (120-second visibility timeout)
|
|
121
|
+
* - **Dead Letter Queue**: Captures failed messages after 5 processing attempts
|
|
122
|
+
* - **Lambda Function**: Containerized function that generates STAC items using stactools
|
|
123
|
+
*
|
|
124
|
+
* ## Data Flow
|
|
125
|
+
*
|
|
126
|
+
* 1. External systems publish ItemRequest messages to the SNS topic with metadata about assets
|
|
127
|
+
* 2. The SQS queue buffers these messages and triggers the Lambda function
|
|
128
|
+
* 3. The Lambda function:
|
|
129
|
+
* - Uses `uvx` to install the required stactools package
|
|
130
|
+
* - Executes the `create-item` CLI command with provided arguments
|
|
131
|
+
* - Publishes generated STAC items to the ItemLoad topic
|
|
132
|
+
* 4. Failed processing attempts are sent to the dead letter queue
|
|
133
|
+
*
|
|
134
|
+
* ## Operational Characteristics
|
|
135
|
+
*
|
|
136
|
+
* - **Scalability**: Lambda scales automatically based on queue depth (up to maxConcurrency)
|
|
137
|
+
* - **Flexibility**: Supports any stactools package through dynamic installation
|
|
138
|
+
* - **Reliability**: Dead letter queue captures failed generation attempts
|
|
139
|
+
* - **Isolation**: Each generation task runs in a fresh container environment
|
|
140
|
+
* - **Observability**: CloudWatch logs retained for one week
|
|
141
|
+
*
|
|
142
|
+
* ## Message Schema
|
|
143
|
+
*
|
|
144
|
+
* The function expects messages matching the ItemRequest model:
|
|
145
|
+
*
|
|
146
|
+
* ```json
|
|
147
|
+
* {
|
|
148
|
+
* "package_name": "stactools-glad-global-forest-change",
|
|
149
|
+
* "group_name": "gladglobalforestchange",
|
|
150
|
+
* "create_item_args": [
|
|
151
|
+
* "https://example.com/data.tif"
|
|
152
|
+
* ],
|
|
153
|
+
* "collection_id": "glad-global-forest-change-1.11"
|
|
154
|
+
* }
|
|
155
|
+
* ```
|
|
156
|
+
*
|
|
157
|
+
* ## Usage Example
|
|
158
|
+
*
|
|
159
|
+
* ```typescript
|
|
160
|
+
* // Create item loader first (or get existing topic ARN)
|
|
161
|
+
* const loader = new StacItemLoader(this, 'ItemLoader', {
|
|
162
|
+
* pgstacDb: database
|
|
163
|
+
* });
|
|
164
|
+
*
|
|
165
|
+
* // Create item generator that feeds the loader
|
|
166
|
+
* const generator = new StactoolsItemGenerator(this, 'ItemGenerator', {
|
|
167
|
+
* itemLoadTopicArn: loader.topic.topicArn,
|
|
168
|
+
* lambdaTimeoutSeconds: 120, // Allow time for package installation
|
|
169
|
+
* maxConcurrency: 100, // Control parallel processing
|
|
170
|
+
* batchSize: 10 // Process 10 requests per invocation
|
|
171
|
+
* });
|
|
172
|
+
*
|
|
173
|
+
* // Grant permission to publish to the loader topic
|
|
174
|
+
* loader.topic.grantPublish(generator.lambdaFunction);
|
|
175
|
+
* ```
|
|
176
|
+
*
|
|
177
|
+
* ## Publishing Generation Requests
|
|
178
|
+
*
|
|
179
|
+
* Send messages to the generator topic to trigger item creation:
|
|
180
|
+
*
|
|
181
|
+
* ```bash
|
|
182
|
+
* aws sns publish --topic-arn $ITEM_GEN_TOPIC --message '{
|
|
183
|
+
* "package_name": "stactools-glad-global-forest-change",
|
|
184
|
+
* "group_name": "gladglobalforestchange",
|
|
185
|
+
* "create_item_args": [
|
|
186
|
+
* "https://storage.googleapis.com/earthenginepartners-hansen/GFC-2023-v1.11/Hansen_GFC-2023-v1.11_gain_40N_080W.tif"
|
|
187
|
+
* ],
|
|
188
|
+
* "collection_id": "glad-global-forest-change-1.11"
|
|
189
|
+
* }'
|
|
190
|
+
* ```
|
|
191
|
+
*
|
|
192
|
+
* ## Batch Processing Example
|
|
193
|
+
*
|
|
194
|
+
* For processing many assets, you can loop through URLs:
|
|
195
|
+
*
|
|
196
|
+
* ```bash
|
|
197
|
+
* while IFS= read -r url; do
|
|
198
|
+
* aws sns publish --topic-arn "$ITEM_GEN_TOPIC" --message "{
|
|
199
|
+
* \"package_name\": \"stactools-glad-glclu2020\",
|
|
200
|
+
* \"group_name\": \"gladglclu2020\",
|
|
201
|
+
* \"create_item_args\": [\"$url\"]
|
|
202
|
+
* }"
|
|
203
|
+
* done < urls.txt
|
|
204
|
+
* ```
|
|
205
|
+
*
|
|
206
|
+
* ## Monitoring and Troubleshooting
|
|
207
|
+
*
|
|
208
|
+
* - Monitor Lambda logs: `/aws/lambda/{FunctionName}`
|
|
209
|
+
* - Check dead letter queue for failed generation attempts
|
|
210
|
+
* - Use CloudWatch metrics to track processing rates and errors
|
|
211
|
+
* - Failed items can be replayed from the dead letter queue
|
|
212
|
+
*
|
|
213
|
+
* ## Supported Stactools Packages
|
|
214
|
+
*
|
|
215
|
+
* Any package available on PyPI that follows the stactools plugin pattern
|
|
216
|
+
* can be used. Examples include:
|
|
217
|
+
* - `stactools-glad-global-forest-change`
|
|
218
|
+
* - `stactools-glad-glclu2020`
|
|
219
|
+
* - `stactools-landsat`
|
|
220
|
+
* - `stactools-sentinel2`
|
|
221
|
+
*
|
|
222
|
+
* @see {@link https://github.com/stactools-packages} for available stactools packages
|
|
223
|
+
* @see {@link https://stactools.readthedocs.io/} for stactools documentation
|
|
224
|
+
*/
|
|
225
|
+
export declare class StactoolsItemGenerator extends Construct {
|
|
226
|
+
/**
|
|
227
|
+
* The SQS queue that buffers item generation requests.
|
|
228
|
+
*
|
|
229
|
+
* This queue receives messages from the SNS topic containing ItemRequest
|
|
230
|
+
* payloads. It's configured with a visibility timeout that matches the
|
|
231
|
+
* Lambda timeout plus buffer time to prevent duplicate processing.
|
|
232
|
+
*/
|
|
233
|
+
readonly queue: sqs.Queue;
|
|
234
|
+
/**
|
|
235
|
+
* Dead letter queue for failed item generation attempts.
|
|
236
|
+
*
|
|
237
|
+
* Messages that fail processing after 5 attempts are sent here for
|
|
238
|
+
* inspection and potential replay. This helps with debugging stactools
|
|
239
|
+
* package issues, network failures, or malformed requests.
|
|
240
|
+
*/
|
|
241
|
+
readonly deadLetterQueue: sqs.Queue;
|
|
242
|
+
/**
|
|
243
|
+
* The SNS topic that receives item generation requests.
|
|
244
|
+
*
|
|
245
|
+
* External systems publish ItemRequest messages to this topic to trigger
|
|
246
|
+
* STAC item generation. The topic fans out to the SQS queue for processing.
|
|
247
|
+
*/
|
|
248
|
+
readonly topic: sns.Topic;
|
|
249
|
+
/**
|
|
250
|
+
* The containerized Lambda function that generates STAC items.
|
|
251
|
+
*
|
|
252
|
+
* This Docker-based function dynamically installs stactools packages
|
|
253
|
+
* using uvx, processes source data, and publishes generated STAC items
|
|
254
|
+
* to the configured ItemLoad SNS topic.
|
|
255
|
+
*/
|
|
256
|
+
readonly lambdaFunction: lambda.DockerImageFunction;
|
|
257
|
+
constructor(scope: Construct, id: string, props: StactoolsItemGeneratorProps);
|
|
258
|
+
}
|