eoapi-cdk 8.2.3 → 8.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.jsii +130 -66
- package/lib/bastion-host/index.js +1 -1
- package/lib/database/index.js +1 -1
- package/lib/index.d.ts +1 -1
- package/lib/index.js +2 -2
- package/lib/ingestor-api/index.js +1 -1
- package/lib/stac-api/index.js +1 -1
- package/lib/stac-browser/index.js +1 -1
- package/lib/{stac-item-loader → stac-loader}/index.d.ts +83 -57
- package/lib/stac-loader/index.js +284 -0
- package/lib/{stac-item-loader → stac-loader}/runtime/Dockerfile +3 -3
- package/lib/{stac-item-loader → stac-loader}/runtime/pyproject.toml +2 -2
- package/lib/{stac-item-loader/runtime/src/stac_item_loader → stac-loader/runtime/src/stac_loader}/handler.py +54 -26
- package/lib/stactools-item-generator/index.d.ts +2 -2
- package/lib/stactools-item-generator/index.js +3 -3
- package/lib/tipg-api/index.js +1 -1
- package/lib/titiler-pgstac-api/index.js +1 -1
- package/package.json +1 -1
- package/pyproject.toml +4 -4
- package/uv.lock +5 -5
- package/lib/stac-item-loader/index.js +0 -255
- /package/lib/{stac-item-loader/runtime/src/stac_item_loader → stac-loader/runtime/src/stac_loader}/__init__.py +0 -0
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var _a, _b;
|
|
3
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
4
|
+
exports.StacItemLoader = exports.StacLoader = void 0;
|
|
5
|
+
const JSII_RTTI_SYMBOL_1 = Symbol.for("jsii.rtti");
|
|
6
|
+
const aws_cdk_lib_1 = require("aws-cdk-lib");
|
|
7
|
+
const constructs_1 = require("constructs");
|
|
8
|
+
const path = require("path");
|
|
9
|
+
/**
|
|
10
|
+
* AWS CDK Construct for STAC Object Loading Infrastructure
|
|
11
|
+
*
|
|
12
|
+
* The StacLoader creates a serverless, event-driven system for loading
|
|
13
|
+
* STAC (SpatioTemporal Asset Catalog) objects into a PostgreSQL database with
|
|
14
|
+
* the pgstac extension. This construct supports multiple ingestion pathways
|
|
15
|
+
* for flexible STAC object loading.
|
|
16
|
+
*
|
|
17
|
+
* ## Architecture Overview
|
|
18
|
+
*
|
|
19
|
+
* This construct creates the following AWS resources:
|
|
20
|
+
* - **SNS Topic**: Entry point for STAC objects and S3 event notifications
|
|
21
|
+
* - **SQS Queue**: Buffers and batches messages before processing (60-second visibility timeout)
|
|
22
|
+
* - **Dead Letter Queue**: Captures failed loading attempts after 5 retries
|
|
23
|
+
* - **Lambda Function**: Python function that processes batches and inserts objects into pgstac
|
|
24
|
+
*
|
|
25
|
+
* ## Data Flow
|
|
26
|
+
*
|
|
27
|
+
* The loader supports two primary data ingestion patterns:
|
|
28
|
+
*
|
|
29
|
+
* ### Direct STAC Object Publishing
|
|
30
|
+
* 1. STAC objects (JSON) are published directly to the SNS topic in message bodies
|
|
31
|
+
* 2. The SQS queue collects messages and batches them (up to {batchSize} objects or 1 minute window)
|
|
32
|
+
* 3. The Lambda function receives batches, validates objects, and inserts into pgstac
|
|
33
|
+
*
|
|
34
|
+
* ### S3 Event-Driven Loading
|
|
35
|
+
* 1. An S3 bucket is configured to send notifications to the SNS topic when json files are created
|
|
36
|
+
* 2. STAC objects are uploaded to S3 buckets as JSON/GeoJSON files
|
|
37
|
+
* 3. S3 event notifications are sent to the SNS topic when objects are uploaded
|
|
38
|
+
* 4. The Lambda function receives S3 events in the SQS message batch, fetches objects from S3, and loads into pgstac
|
|
39
|
+
*
|
|
40
|
+
* ## Batching Behavior
|
|
41
|
+
*
|
|
42
|
+
* The SQS-to-Lambda integration uses intelligent batching to optimize performance:
|
|
43
|
+
*
|
|
44
|
+
* - **Batch Size**: Lambda waits to receive up to `batchSize` messages (default: 500)
|
|
45
|
+
* - **Batching Window**: If fewer than `batchSize` messages are available, Lambda
|
|
46
|
+
* triggers after `maxBatchingWindow` minutes (default: 1 minute)
|
|
47
|
+
* - **Trigger Condition**: Lambda executes when EITHER condition is met first
|
|
48
|
+
* - **Concurrency**: Limited to `maxConcurrency` concurrent executions to prevent database overload
|
|
49
|
+
* - **Partial Failures**: Uses `reportBatchItemFailures` to retry only failed objects
|
|
50
|
+
*
|
|
51
|
+
* This approach balances throughput (larger batches = fewer database connections)
|
|
52
|
+
* with latency (time-based triggers prevent indefinite waiting).
|
|
53
|
+
*
|
|
54
|
+
* ## Error Handling and Dead Letter Queue
|
|
55
|
+
*
|
|
56
|
+
* Failed messages are sent to the dead letter queue after 5 processing attempts.
|
|
57
|
+
* **Important**: This construct provides NO automated handling of dead letter queue
|
|
58
|
+
* messages - monitoring, inspection, and reprocessing of failed objects is the
|
|
59
|
+
* responsibility of the implementing application.
|
|
60
|
+
*
|
|
61
|
+
* Consider implementing:
|
|
62
|
+
* - CloudWatch alarms on dead letter queue depth
|
|
63
|
+
* - Manual or automated reprocessing workflows
|
|
64
|
+
* - Logging and alerting for failed objects
|
|
65
|
+
* - Regular cleanup of old dead letter messages (14-day retention)
|
|
66
|
+
*
|
|
67
|
+
* ## Operational Characteristics
|
|
68
|
+
*
|
|
69
|
+
* - **Scalability**: Lambda scales automatically based on queue depth
|
|
70
|
+
* - **Reliability**: Dead letter queue captures failures for debugging
|
|
71
|
+
* - **Efficiency**: Batching optimizes database operations for high throughput
|
|
72
|
+
* - **Security**: Database credentials accessed via AWS Secrets Manager
|
|
73
|
+
* - **Observability**: CloudWatch logs retained for one week
|
|
74
|
+
*
|
|
75
|
+
* ## Prerequisites
|
|
76
|
+
*
|
|
77
|
+
* Before using this construct, ensure:
|
|
78
|
+
* - The pgstac database has collections loaded (objects require existing collection IDs)
|
|
79
|
+
* - Database credentials are stored in AWS Secrets Manager
|
|
80
|
+
* - The pgstac extension is properly installed and configured
|
|
81
|
+
*
|
|
82
|
+
* ## Usage Example
|
|
83
|
+
*
|
|
84
|
+
* ```typescript
|
|
85
|
+
* // Create database first
|
|
86
|
+
* const database = new PgStacDatabase(this, 'Database', {
|
|
87
|
+
* pgstacVersion: '0.9.5'
|
|
88
|
+
* });
|
|
89
|
+
*
|
|
90
|
+
* // Create Object loader
|
|
91
|
+
* const loader = new StacLoader(this, 'StacLoader', {
|
|
92
|
+
* pgstacDb: database,
|
|
93
|
+
* batchSize: 1000, // Process up to 1000 objects per batch
|
|
94
|
+
* maxBatchingWindowMinutes: 1, // Wait max 1 minute to fill batch
|
|
95
|
+
* lambdaTimeoutSeconds: 300 // Allow up to 300 seconds for database operations
|
|
96
|
+
* });
|
|
97
|
+
*
|
|
98
|
+
* // The topic ARN can be used by other services to publish objects
|
|
99
|
+
* new CfnOutput(this, 'LoaderTopicArn', {
|
|
100
|
+
* value: loader.topic.topicArn
|
|
101
|
+
* });
|
|
102
|
+
* ```
|
|
103
|
+
*
|
|
104
|
+
* ## Direct Object Publishing
|
|
105
|
+
*
|
|
106
|
+
* External services can publish STAC objects directly to the topic:
|
|
107
|
+
*
|
|
108
|
+
* ```bash
|
|
109
|
+
* aws sns publish --topic-arn $STAC_LOAD_TOPIC --message '{
|
|
110
|
+
* "id": "example-collection",
|
|
111
|
+
* "type": "Collection",
|
|
112
|
+
* "title": "Example Collection",
|
|
113
|
+
* "description": "An example collection",
|
|
114
|
+
* "license": "proprietary",
|
|
115
|
+
* "extent": {
|
|
116
|
+
* "spatial": {"bbox": [[-180, -90, 180, 90]]},
|
|
117
|
+
* "temporal": {"interval": [[null, null]]},
|
|
118
|
+
* },
|
|
119
|
+
* "stac_version": "1.1.0",
|
|
120
|
+
* }'
|
|
121
|
+
*
|
|
122
|
+
* aws sns publish --topic-arn $STAC_LOAD_TOPIC --message '{
|
|
123
|
+
* "type": "Feature",
|
|
124
|
+
* "stac_version": "1.0.0",
|
|
125
|
+
* "id": "example-item",
|
|
126
|
+
* "properties": {"datetime": "2021-01-01T00:00:00Z"},
|
|
127
|
+
* "geometry": {"type": "Polygon", "coordinates": [...]},
|
|
128
|
+
* "collection": "example-collection"
|
|
129
|
+
* }'
|
|
130
|
+
*
|
|
131
|
+
*
|
|
132
|
+
* ```
|
|
133
|
+
*
|
|
134
|
+
* ## S3 Event Configuration
|
|
135
|
+
*
|
|
136
|
+
* To enable S3 event-driven loading, configure S3 bucket notifications to send
|
|
137
|
+
* events to the SNS topic when STAC objects (.json or .geojson files) are uploaded:
|
|
138
|
+
*
|
|
139
|
+
* ```typescript
|
|
140
|
+
* // Configure S3 bucket to send notifications to the loader topic
|
|
141
|
+
* bucket.addEventNotification(
|
|
142
|
+
* s3.EventType.OBJECT_CREATED,
|
|
143
|
+
* new s3n.SnsDestination(loader.topic),
|
|
144
|
+
* { suffix: '.json' }
|
|
145
|
+
* );
|
|
146
|
+
*
|
|
147
|
+
* bucket.addEventNotification(
|
|
148
|
+
* s3.EventType.OBJECT_CREATED,
|
|
149
|
+
* new s3n.SnsDestination(loader.topic),
|
|
150
|
+
* { suffix: '.geojson' }
|
|
151
|
+
* );
|
|
152
|
+
* ```
|
|
153
|
+
*
|
|
154
|
+
* When STAC objects are uploaded to the configured S3 bucket, the loader will:
|
|
155
|
+
* 1. Receive S3 event notifications via SNS
|
|
156
|
+
* 2. Fetch the STAC JSON from S3
|
|
157
|
+
* 3. Validate and load the objects into the pgstac database
|
|
158
|
+
*
|
|
159
|
+
* ## Monitoring and Troubleshooting
|
|
160
|
+
*
|
|
161
|
+
* - Monitor Lambda logs: `/aws/lambda/{FunctionName}`
|
|
162
|
+
* - **Dead Letter Queue**: Check for failed objects - **no automated handling provided**
|
|
163
|
+
* - Use batch objects failure reporting for partial batch processing
|
|
164
|
+
* - CloudWatch metrics available for queue depth and Lambda performance
|
|
165
|
+
*
|
|
166
|
+
* ### Dead Letter Queue Management
|
|
167
|
+
*
|
|
168
|
+
* Applications must implement their own dead letter queue monitoring:
|
|
169
|
+
*
|
|
170
|
+
* ```typescript
|
|
171
|
+
* // Example: CloudWatch alarm for dead letter queue depth
|
|
172
|
+
* new cloudwatch.Alarm(this, 'DeadLetterAlarm', {
|
|
173
|
+
* metric: loader.deadLetterQueue.metricApproximateNumberOfVisibleMessages(),
|
|
174
|
+
* threshold: 1,
|
|
175
|
+
* evaluationPeriods: 1
|
|
176
|
+
* });
|
|
177
|
+
*
|
|
178
|
+
* // Example: Lambda to reprocess dead letter messages
|
|
179
|
+
* const reprocessFunction = new lambda.Function(this, 'Reprocess', {
|
|
180
|
+
* // Implementation to fetch and republish failed messages
|
|
181
|
+
* });
|
|
182
|
+
* ```
|
|
183
|
+
*
|
|
184
|
+
*/
|
|
185
|
+
class StacLoader extends constructs_1.Construct {
|
|
186
|
+
constructor(scope, id, props) {
|
|
187
|
+
super(scope, id);
|
|
188
|
+
const timeoutSeconds = props.lambdaTimeoutSeconds ?? 300;
|
|
189
|
+
const lambdaRuntime = props.lambdaRuntime ?? aws_cdk_lib_1.aws_lambda.Runtime.PYTHON_3_11;
|
|
190
|
+
const maxConcurrency = props.maxConcurrency ?? 2;
|
|
191
|
+
// Create dead letter queue
|
|
192
|
+
this.deadLetterQueue = new aws_cdk_lib_1.aws_sqs.Queue(this, "DeadLetterQueue", {
|
|
193
|
+
retentionPeriod: aws_cdk_lib_1.Duration.days(14),
|
|
194
|
+
});
|
|
195
|
+
// Create main queue
|
|
196
|
+
this.queue = new aws_cdk_lib_1.aws_sqs.Queue(this, "Queue", {
|
|
197
|
+
visibilityTimeout: aws_cdk_lib_1.Duration.seconds(timeoutSeconds + 10),
|
|
198
|
+
encryption: aws_cdk_lib_1.aws_sqs.QueueEncryption.SQS_MANAGED,
|
|
199
|
+
deadLetterQueue: {
|
|
200
|
+
maxReceiveCount: 5,
|
|
201
|
+
queue: this.deadLetterQueue,
|
|
202
|
+
},
|
|
203
|
+
});
|
|
204
|
+
// Create SNS topic
|
|
205
|
+
this.topic = new aws_cdk_lib_1.aws_sns.Topic(this, "Topic", {
|
|
206
|
+
displayName: `${id}-StacLoaderTopic`,
|
|
207
|
+
});
|
|
208
|
+
// Subscribe the queue to the topic
|
|
209
|
+
this.topic.addSubscription(new aws_cdk_lib_1.aws_sns_subscriptions.SqsSubscription(this.queue));
|
|
210
|
+
// Create the lambda function
|
|
211
|
+
this.lambdaFunction = new aws_cdk_lib_1.aws_lambda.Function(this, "Function", {
|
|
212
|
+
runtime: lambdaRuntime,
|
|
213
|
+
handler: "stac_loader.handler.handler",
|
|
214
|
+
vpc: props.vpc,
|
|
215
|
+
vpcSubnets: props.subnetSelection,
|
|
216
|
+
code: aws_cdk_lib_1.aws_lambda.Code.fromDockerBuild(path.join(__dirname, ".."), {
|
|
217
|
+
file: "stac-loader/runtime/Dockerfile",
|
|
218
|
+
platform: "linux/amd64",
|
|
219
|
+
buildArgs: {
|
|
220
|
+
PYTHON_VERSION: lambdaRuntime.toString().replace("python", ""),
|
|
221
|
+
PGSTAC_VERSION: props.pgstacDb.pgstacVersion,
|
|
222
|
+
},
|
|
223
|
+
}),
|
|
224
|
+
memorySize: props.memorySize ?? 1024,
|
|
225
|
+
timeout: aws_cdk_lib_1.Duration.seconds(timeoutSeconds),
|
|
226
|
+
reservedConcurrentExecutions: maxConcurrency,
|
|
227
|
+
logRetention: aws_cdk_lib_1.aws_logs.RetentionDays.ONE_WEEK,
|
|
228
|
+
environment: {
|
|
229
|
+
PGSTAC_SECRET_ARN: props.pgstacDb.pgstacSecret.secretArn,
|
|
230
|
+
...props.environment,
|
|
231
|
+
},
|
|
232
|
+
// overwrites defaults with user-provided configurable properties
|
|
233
|
+
...props.lambdaFunctionOptions,
|
|
234
|
+
});
|
|
235
|
+
// Grant permissions to read the database secret
|
|
236
|
+
props.pgstacDb.pgstacSecret.grantRead(this.lambdaFunction);
|
|
237
|
+
// Add SQS event source to the lambda
|
|
238
|
+
this.lambdaFunction.addEventSource(new aws_cdk_lib_1.aws_lambda_event_sources.SqsEventSource(this.queue, {
|
|
239
|
+
batchSize: props.batchSize ?? 500,
|
|
240
|
+
maxBatchingWindow: aws_cdk_lib_1.Duration.minutes(props.maxBatchingWindowMinutes ?? 1),
|
|
241
|
+
maxConcurrency: maxConcurrency,
|
|
242
|
+
reportBatchItemFailures: true,
|
|
243
|
+
}));
|
|
244
|
+
// Create outputs
|
|
245
|
+
const exportPrefix = aws_cdk_lib_1.Stack.of(this).stackName;
|
|
246
|
+
new aws_cdk_lib_1.CfnOutput(this, "TopicArn", {
|
|
247
|
+
value: this.topic.topicArn,
|
|
248
|
+
description: "ARN of the StacLoader SNS Topic",
|
|
249
|
+
exportName: `${exportPrefix}-stac-loader-topic-arn`,
|
|
250
|
+
});
|
|
251
|
+
new aws_cdk_lib_1.CfnOutput(this, "QueueUrl", {
|
|
252
|
+
value: this.queue.queueUrl,
|
|
253
|
+
description: "URL of the StacLoader SQS Queue",
|
|
254
|
+
exportName: `${exportPrefix}-stac-loader-queue-url`,
|
|
255
|
+
});
|
|
256
|
+
new aws_cdk_lib_1.CfnOutput(this, "DeadLetterQueueUrl", {
|
|
257
|
+
value: this.deadLetterQueue.queueUrl,
|
|
258
|
+
description: "URL of the StacLoader Dead Letter Queue",
|
|
259
|
+
exportName: `${exportPrefix}-stac-loader-deadletter-queue-url`,
|
|
260
|
+
});
|
|
261
|
+
new aws_cdk_lib_1.CfnOutput(this, "FunctionName", {
|
|
262
|
+
value: this.lambdaFunction.functionName,
|
|
263
|
+
description: "Name of the StacLoader Lambda Function",
|
|
264
|
+
exportName: `${exportPrefix}-stac-loader-function-name`,
|
|
265
|
+
});
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
exports.StacLoader = StacLoader;
|
|
269
|
+
_a = JSII_RTTI_SYMBOL_1;
|
|
270
|
+
StacLoader[_a] = { fqn: "eoapi-cdk.StacLoader", version: "8.3.1" };
|
|
271
|
+
/**
|
|
272
|
+
* @deprecated Use StacLoader instead. StacItemLoader will be removed in a future version.
|
|
273
|
+
*/
|
|
274
|
+
class StacItemLoader extends StacLoader {
|
|
275
|
+
constructor(scope, id, props) {
|
|
276
|
+
console.warn(`StacItemLoader is deprecated. Please use StacLoader instead. ` +
|
|
277
|
+
`StacItemLoader will be removed in a future version.`);
|
|
278
|
+
super(scope, id, props);
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
exports.StacItemLoader = StacItemLoader;
|
|
282
|
+
_b = JSII_RTTI_SYMBOL_1;
|
|
283
|
+
StacItemLoader[_b] = { fqn: "eoapi-cdk.StacItemLoader", version: "8.3.1" };
|
|
284
|
+
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"index.js","sourceRoot":"","sources":["index.ts"],"names":[],"mappings":";;;;;AAAA,6CAWqB;AACrB,2CAAuC;AAEvC,6BAA6B;AA0I7B;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+KG;AACH,MAAa,UAAW,SAAQ,sBAAS;IAoDvC,YAAY,KAAgB,EAAE,EAAU,EAAE,KAAsB;QAC9D,KAAK,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QAEjB,MAAM,cAAc,GAAG,KAAK,CAAC,oBAAoB,IAAI,GAAG,CAAC;QACzD,MAAM,aAAa,GAAG,KAAK,CAAC,aAAa,IAAI,wBAAM,CAAC,OAAO,CAAC,WAAW,CAAC;QACxE,MAAM,cAAc,GAAG,KAAK,CAAC,cAAc,IAAI,CAAC,CAAC;QAEjD,2BAA2B;QAC3B,IAAI,CAAC,eAAe,GAAG,IAAI,qBAAG,CAAC,KAAK,CAAC,IAAI,EAAE,iBAAiB,EAAE;YAC5D,eAAe,EAAE,sBAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;SACnC,CAAC,CAAC;QAEH,oBAAoB;QACpB,IAAI,CAAC,KAAK,GAAG,IAAI,qBAAG,CAAC,KAAK,CAAC,IAAI,EAAE,OAAO,EAAE;YACxC,iBAAiB,EAAE,sBAAQ,CAAC,OAAO,CAAC,cAAc,GAAG,EAAE,CAAC;YACxD,UAAU,EAAE,qBAAG,CAAC,eAAe,CAAC,WAAW;YAC3C,eAAe,EAAE;gBACf,eAAe,EAAE,CAAC;gBAClB,KAAK,EAAE,IAAI,CAAC,eAAe;aAC5B;SACF,CAAC,CAAC;QAEH,mBAAmB;QACnB,IAAI,CAAC,KAAK,GAAG,IAAI,qBAAG,CAAC,KAAK,CAAC,IAAI,EAAE,OAAO,EAAE;YACxC,WAAW,EAAE,GAAG,EAAE,kBAAkB;SACrC,CAAC,CAAC;QAEH,mCAAmC;QACnC,IAAI,CAAC,KAAK,CAAC,eAAe,CACxB,IAAI,mCAAgB,CAAC,eAAe,CAAC,IAAI,CAAC,KAAK,CAAC,CACjD,CAAC;QAEF,6BAA6B;QAC7B,IAAI,CAAC,cAAc,GAAG,IAAI,wBAAM,CAAC,QAAQ,CAAC,IAAI,EAAE,UAAU,EAAE;YAC1D,OAAO,EAAE,aAAa;YACtB,OAAO,EAAE,6BAA6B;YACtC,GAAG,EAAE,KAAK,CAAC,GAAG;YACd,UAAU,EAAE,KAAK,CAAC,eAAe;YACjC,IAAI,EAAE,wBAAM,CAAC,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,EAAE;gBAC5D,IAAI,EAAE,gCAAgC;gBACtC,QAAQ,EAAE,aAAa;gBACvB,SAAS,EAAE;oBACT,cAAc,EAAE,aAAa,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC;oBAC9D,cAAc,EAAE,KAAK,CAAC,QAAQ,CAAC,aAAa;iBAC7C;aACF,CAAC;YACF,UAAU,EAAE,KAAK,CAAC,UAAU,IAAI,IAAI;YACpC,OAAO,EAAE,sBAAQ,CAAC,OAAO,CAAC,cAAc,CAAC;YACzC,4BAA4B,EAAE,cAAc;YAC5C,YAAY,EAAE,sBAAI,CAAC,aAAa,CAAC,QAAQ;YACzC,WAAW,EAAE;gBACX,iBAAiB,EAAE,KAAK,CAAC,QAAQ,CAAC,YAAY,CAAC,SAAS;gBACxD,GAAG,KAAK,CAAC,WAAW;aACrB;YACD,iEAAiE;YACjE,GAAG,KAAK,CAAC,qBAAqB;SAC/B,CAAC,CAAC;QAEH,gDAAgD;QAChD,KAAK,CAAC,QAAQ,CAAC,YAAY,CAAC,SAAS,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAE3D,qCAAqC;QACrC,IAAI,CAAC,cAAc,CAAC,cAAc,CAChC,IAAI,sCAAkB,CAAC,cAAc,CAAC,IAAI,CAAC,KAAK,EAAE;YAChD,SAAS,EAAE,KAAK,CAAC,SAAS,IAAI,GAAG;YACjC,iBAAiB,EAAE,sBAAQ,CAAC,OAAO,CACjC,KAAK,CAAC,wBAAwB,IAAI,CAAC,CACpC;YACD,cAAc,EAAE,cAAc;YAC9B,uBAAuB,EAAE,IAAI;SAC9B,CAAC,CACH,CAAC;QAEF,iBAAiB;QACjB,MAAM,YAAY,GAAG,mBAAK,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC;QAC9C,IAAI,uBAAS,CAAC,IAAI,EAAE,UAAU,EAAE;YAC9B,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,QAAQ;YAC1B,WAAW,EAAE,iCAAiC;YAC9C,UAAU,EAAE,GAAG,YAAY,wBAAwB;SACpD,CAAC,CAAC;QAEH,IAAI,uBAAS,CAAC,IAAI,EAAE,UAAU,EAAE;YAC9B,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,QAAQ;YAC1B,WAAW,EAAE,iCAAiC;YAC9C,UAAU,EAAE,GAAG,YAAY,wBAAwB;SACpD,CAAC,CAAC;QAEH,IAAI,uBAAS,CAAC,IAAI,EAAE,oBAAoB,EAAE;YACxC,KAAK,EAAE,IAAI,CAAC,eAAe,CAAC,QAAQ;YACpC,WAAW,EAAE,yCAAyC;YACtD,UAAU,EAAE,GAAG,YAAY,mCAAmC;SAC/D,CAAC,CAAC;QAEH,IAAI,uBAAS,CAAC,IAAI,EAAE,cAAc,EAAE;YAClC,KAAK,EAAE,IAAI,CAAC,cAAc,CAAC,YAAY;YACvC,WAAW,EAAE,wCAAwC;YACrD,UAAU,EAAE,GAAG,YAAY,4BAA4B;SACxD,CAAC,CAAC;IACL,CAAC;;AAtJH,gCAuJC;;;AAED;;GAEG;AACH,MAAa,cAAe,SAAQ,UAAU;IAC5C,YAAY,KAAgB,EAAE,EAAU,EAAE,KAAsB;QAC9D,OAAO,CAAC,IAAI,CACV,+DAA+D;YAC7D,qDAAqD,CACxD,CAAC;QAEF,KAAK,CAAC,KAAK,EAAE,EAAE,EAAE,KAAK,CAAC,CAAC;IAC1B,CAAC;;AARH,wCASC","sourcesContent":["import {\n  aws_ec2 as ec2,\n  aws_lambda as lambda,\n  aws_sqs as sqs,\n  aws_sns as sns,\n  aws_sns_subscriptions as snsSubscriptions,\n  aws_lambda_event_sources as lambdaEventSources,\n  aws_logs as logs,\n  Duration,\n  CfnOutput,\n  Stack,\n} from \"aws-cdk-lib\";\nimport { Construct } from \"constructs\";\nimport { PgStacDatabase } from \"../database\";\nimport * as path from \"path\";\nimport { CustomLambdaFunctionProps } from \"../utils\";\n\n/**\n * Configuration properties for the StacLoader construct.\n *\n * The StacLoader is part of a two-phase serverless STAC ingestion pipeline\n * that loads STAC collections and items into a pgstac database. This construct creates\n * the infrastructure for receiving STAC objects from multiple sources:\n * 1. SNS messages containing STAC metadata (direct ingestion)\n * 2. S3 event notifications for STAC objects uploaded to S3 buckets\n *\n * Objects from both sources are batched and inserted into PostgreSQL with the pgstac extension.\n *\n * @example\n * const loader = new StacLoader(this, 'StacLoader', {\n *   pgstacDb: database,\n *   batchSize: 1000,\n *   maxBatchingWindowMinutes: 1,\n *   lambdaTimeoutSeconds: 300\n * });\n */\nexport interface StacLoaderProps {\n  /**\n   * The PgSTAC database instance to load data into.\n   *\n   * This database must have the pgstac extension installed and be properly\n   * configured with collections before objects can be loaded. The loader will\n   * use AWS Secrets Manager to securely access database credentials.\n   */\n  readonly pgstacDb: PgStacDatabase;\n\n  /**\n   * VPC into which the lambda should be deployed.\n   */\n  readonly vpc?: ec2.IVpc;\n\n  /**\n   * Subnet into which the lambda should be deployed.\n   */\n  readonly subnetSelection?: ec2.SubnetSelection;\n\n  /**\n   * The lambda runtime to use for the item loading function.\n   *\n   * The function is implemented in Python and uses pypgstac for database\n   * operations. Ensure the runtime version is compatible with the pgstac\n   * version specified in the database configuration.\n   *\n   * @default lambda.Runtime.PYTHON_3_11\n   */\n  readonly lambdaRuntime?: lambda.Runtime;\n\n  /**\n   * The timeout for the item load lambda in seconds.\n   *\n   * This should accommodate the time needed to process up to `batchSize`\n   * objects and perform database insertions. The SQS visibility timeout\n   * will be set to this value plus 10 seconds.\n   *\n   * @default 300\n   */\n  readonly lambdaTimeoutSeconds?: number;\n\n  /**\n   * Memory size for the lambda function in MB.\n   *\n   * Higher memory allocation may improve performance when processing\n   * large batches of STAC objects, especially for memory-intensive\n   * database operations.\n   *\n   * @default 1024\n   */\n  readonly memorySize?: number;\n\n  /**\n   * SQS batch size for lambda event source.\n   *\n   * This determines the maximum number of STAC objects that will be\n   * processed together in a single lambda invocation. Larger batch\n   * sizes improve database insertion efficiency but require more\n   * memory and longer processing time.\n   *\n   * **Batching Behavior**: SQS will wait to accumulate up to this many\n   * messages before triggering the Lambda, OR until the maxBatchingWindow\n   * timeout is reached, whichever comes first. This creates an efficient\n   * balance between throughput and latency.\n   *\n   * @default 500\n   */\n  readonly batchSize?: number;\n\n  /**\n   * Maximum batching window in minutes.\n   *\n   * Even if the batch size isn't reached, the lambda will be triggered\n   * after this time period to ensure timely processing of objects.\n   * This prevents objects from waiting indefinitely in low-volume scenarios.\n   *\n   * **Important**: This timeout works in conjunction with batchSize - SQS\n   * will trigger the Lambda when EITHER the batch size is reached OR this\n   * time window expires, ensuring objects are processed in a timely manner\n   * regardless of volume.\n   *\n   * @default 1\n   */\n  readonly maxBatchingWindowMinutes?: number;\n\n  /**\n   * Maximum concurrent executions for the StacLoader Lambda function\n   *\n   * This limit will be applied to the Lambda function and will control how\n   * many concurrent batches will be released from the SQS queue.\n   *\n   * @default 2\n   */\n  readonly maxConcurrency?: number;\n\n  /**\n   * Additional environment variables for the lambda function.\n   *\n   * These will be merged with the default environment variables including\n   * PGSTAC_SECRET_ARN. Use this for custom configuration or debugging flags.\n   *\n   * If you want to enable the option to upload a boilerplate collection record\n   * in the event that the collection record does not yet exist for an item that\n   * is set to be loaded, set the variable `\"CREATE_COLLECTIONS_IF_MISSING\": \"TRUE\"`.\n   */\n  readonly environment?: { [key: string]: string };\n\n  /**\n   * Can be used to override the default lambda function properties.\n   *\n   * @default - defined in the construct.\n   */\n  readonly lambdaFunctionOptions?: CustomLambdaFunctionProps;\n}\n\n/**\n * AWS CDK Construct for STAC Object Loading Infrastructure\n *\n * The StacLoader creates a serverless, event-driven system for loading\n * STAC (SpatioTemporal Asset Catalog) objects into a PostgreSQL database with\n * the pgstac extension. This construct supports multiple ingestion pathways\n * for flexible STAC object loading.\n *\n * ## Architecture Overview\n *\n * This construct creates the following AWS resources:\n * - **SNS Topic**: Entry point for STAC objects and S3 event notifications\n * - **SQS Queue**: Buffers and batches messages before processing (60-second visibility timeout)\n * - **Dead Letter Queue**: Captures failed loading attempts after 5 retries\n * - **Lambda Function**: Python function that processes batches and inserts objects into pgstac\n *\n * ## Data Flow\n *\n * The loader supports two primary data ingestion patterns:\n *\n * ### Direct STAC Object Publishing\n * 1. STAC objects (JSON) are published directly to the SNS topic in message bodies\n * 2. The SQS queue collects messages and batches them (up to {batchSize} objects or 1 minute window)\n * 3. The Lambda function receives batches, validates objects, and inserts into pgstac\n *\n * ### S3 Event-Driven Loading\n * 1. An S3 bucket is configured to send notifications to the SNS topic when json files are created\n * 2. STAC objects are uploaded to S3 buckets as JSON/GeoJSON files\n * 3. S3 event notifications are sent to the SNS topic when objects are uploaded\n * 4. The Lambda function receives S3 events in the SQS message batch, fetches objects from S3, and loads into pgstac\n *\n * ## Batching Behavior\n *\n * The SQS-to-Lambda integration uses intelligent batching to optimize performance:\n *\n * - **Batch Size**: Lambda waits to receive up to `batchSize` messages (default: 500)\n * - **Batching Window**: If fewer than `batchSize` messages are available, Lambda\n *   triggers after `maxBatchingWindow` minutes (default: 1 minute)\n * - **Trigger Condition**: Lambda executes when EITHER condition is met first\n * - **Concurrency**: Limited to `maxConcurrency` concurrent executions to prevent database overload\n * - **Partial Failures**: Uses `reportBatchItemFailures` to retry only failed objects\n *\n * This approach balances throughput (larger batches = fewer database connections)\n * with latency (time-based triggers prevent indefinite waiting).\n *\n * ## Error Handling and Dead Letter Queue\n *\n * Failed messages are sent to the dead letter queue after 5 processing attempts.\n * **Important**: This construct provides NO automated handling of dead letter queue\n * messages - monitoring, inspection, and reprocessing of failed objects is the\n * responsibility of the implementing application.\n *\n * Consider implementing:\n * - CloudWatch alarms on dead letter queue depth\n * - Manual or automated reprocessing workflows\n * - Logging and alerting for failed objects\n * - Regular cleanup of old dead letter messages (14-day retention)\n *\n * ## Operational Characteristics\n *\n * - **Scalability**: Lambda scales automatically based on queue depth\n * - **Reliability**: Dead letter queue captures failures for debugging\n * - **Efficiency**: Batching optimizes database operations for high throughput\n * - **Security**: Database credentials accessed via AWS Secrets Manager\n * - **Observability**: CloudWatch logs retained for one week\n *\n * ## Prerequisites\n *\n * Before using this construct, ensure:\n * - The pgstac database has collections loaded (objects require existing collection IDs)\n * - Database credentials are stored in AWS Secrets Manager\n * - The pgstac extension is properly installed and configured\n *\n * ## Usage Example\n *\n * ```typescript\n * // Create database first\n * const database = new PgStacDatabase(this, 'Database', {\n *   pgstacVersion: '0.9.5'\n * });\n *\n * // Create Object loader\n * const loader = new StacLoader(this, 'StacLoader', {\n *   pgstacDb: database,\n *   batchSize: 1000,          // Process up to 1000 objects per batch\n *   maxBatchingWindowMinutes: 1, // Wait max 1 minute to fill batch\n *   lambdaTimeoutSeconds: 300     // Allow up to 300 seconds for database operations\n * });\n *\n * // The topic ARN can be used by other services to publish objects\n * new CfnOutput(this, 'LoaderTopicArn', {\n *   value: loader.topic.topicArn\n * });\n * ```\n *\n * ## Direct Object Publishing\n *\n * External services can publish STAC objects directly to the topic:\n *\n * ```bash\n * aws sns publish --topic-arn $STAC_LOAD_TOPIC --message  '{\n *   \"id\": \"example-collection\",\n *   \"type\": \"Collection\",\n *   \"title\": \"Example Collection\",\n *   \"description\": \"An example collection\",\n *   \"license\": \"proprietary\",\n *   \"extent\": {\n *       \"spatial\": {\"bbox\": [[-180, -90, 180, 90]]},\n *       \"temporal\": {\"interval\": [[null, null]]},\n *   },\n *   \"stac_version\": \"1.1.0\",\n * }'\n *\n * aws sns publish --topic-arn $STAC_LOAD_TOPIC --message '{\n *   \"type\": \"Feature\",\n *   \"stac_version\": \"1.0.0\",\n *   \"id\": \"example-item\",\n *   \"properties\": {\"datetime\": \"2021-01-01T00:00:00Z\"},\n *   \"geometry\": {\"type\": \"Polygon\", \"coordinates\": [...]},\n *   \"collection\": \"example-collection\"\n * }'\n *\n *\n * ```\n *\n * ## S3 Event Configuration\n *\n * To enable S3 event-driven loading, configure S3 bucket notifications to send\n * events to the SNS topic when STAC objects (.json or .geojson files) are uploaded:\n *\n * ```typescript\n * // Configure S3 bucket to send notifications to the loader topic\n * bucket.addEventNotification(\n *   s3.EventType.OBJECT_CREATED,\n *   new s3n.SnsDestination(loader.topic),\n *   { suffix: '.json' }\n * );\n *\n * bucket.addEventNotification(\n *   s3.EventType.OBJECT_CREATED,\n *   new s3n.SnsDestination(loader.topic),\n *   { suffix: '.geojson' }\n * );\n * ```\n *\n * When STAC objects are uploaded to the configured S3 bucket, the loader will:\n * 1. Receive S3 event notifications via SNS\n * 2. Fetch the STAC JSON from S3\n * 3. Validate and load the objects into the pgstac database\n *\n * ## Monitoring and Troubleshooting\n *\n * - Monitor Lambda logs: `/aws/lambda/{FunctionName}`\n * - **Dead Letter Queue**: Check for failed objects - **no automated handling provided**\n * - Use batch objects failure reporting for partial batch processing\n * - CloudWatch metrics available for queue depth and Lambda performance\n *\n * ### Dead Letter Queue Management\n *\n * Applications must implement their own dead letter queue monitoring:\n *\n * ```typescript\n * // Example: CloudWatch alarm for dead letter queue depth\n * new cloudwatch.Alarm(this, 'DeadLetterAlarm', {\n *   metric: loader.deadLetterQueue.metricApproximateNumberOfVisibleMessages(),\n *   threshold: 1,\n *   evaluationPeriods: 1\n * });\n *\n * // Example: Lambda to reprocess dead letter messages\n * const reprocessFunction = new lambda.Function(this, 'Reprocess', {\n *   // Implementation to fetch and republish failed messages\n * });\n * ```\n *\n */\nexport class StacLoader extends Construct {\n  /**\n   * The SNS topic that receives STAC objects and S3 event notifications for loading.\n   *\n   * This topic serves as the entry point for two types of events:\n   * 1. Direct STAC JSON documents published by external services\n   * 2. S3 event notifications when STAC objects are uploaded to configured buckets\n   *\n   * The topic fans out to the SQS queue for batched processing.\n   */\n  public readonly topic: sns.Topic;\n\n  /**\n   * The SQS queue that buffers messages before processing.\n   *\n   * This queue collects both direct STAC objects from SNS and S3 event\n   * notifications, batching them for efficient database operations.\n   * Configured with a visibility timeout that accommodates Lambda\n   * processing time plus buffer.\n   */\n  public readonly queue: sqs.Queue;\n\n  /**\n   * Dead letter queue for failed objects loading attempts.\n   *\n   * Messages that fail processing after 5 attempts are sent here\n   * for inspection and potential replay. Retains messages for 14 days\n   * to allow for debugging and manual intervention.\n   *\n   * **User Responsibility**: This construct provides NO automated monitoring,\n   * alerting, or reprocessing of dead letter queue messages. Applications\n   * using this construct must implement their own:\n   * - Dead letter queue depth monitoring and alerting\n   * - Failed message inspection and debugging workflows\n   * - Manual or automated reprocessing mechanisms\n   * - Cleanup procedures for old failed messages\n   */\n  public readonly deadLetterQueue: sqs.Queue;\n\n  /**\n   * The Lambda function that loads STAC objects into the pgstac database.\n   *\n   * This Python function receives batches of messages from SQS and processes\n   * them based on their type:\n   * - Direct STAC objects: Validates and loads directly into pgstac\n   * - S3 events: Fetches STAC JSON from S3, validates, and loads into pgstac\n   *\n   * The function connects to PostgreSQL using credentials from Secrets Manager\n   * and uses pypgstac for efficient database operations.\n   */\n  public readonly lambdaFunction: lambda.Function;\n\n  constructor(scope: Construct, id: string, props: StacLoaderProps) {\n    super(scope, id);\n\n    const timeoutSeconds = props.lambdaTimeoutSeconds ?? 300;\n    const lambdaRuntime = props.lambdaRuntime ?? lambda.Runtime.PYTHON_3_11;\n    const maxConcurrency = props.maxConcurrency ?? 2;\n\n    // Create dead letter queue\n    this.deadLetterQueue = new sqs.Queue(this, \"DeadLetterQueue\", {\n      retentionPeriod: Duration.days(14),\n    });\n\n    // Create main queue\n    this.queue = new sqs.Queue(this, \"Queue\", {\n      visibilityTimeout: Duration.seconds(timeoutSeconds + 10),\n      encryption: sqs.QueueEncryption.SQS_MANAGED,\n      deadLetterQueue: {\n        maxReceiveCount: 5,\n        queue: this.deadLetterQueue,\n      },\n    });\n\n    // Create SNS topic\n    this.topic = new sns.Topic(this, \"Topic\", {\n      displayName: `${id}-StacLoaderTopic`,\n    });\n\n    // Subscribe the queue to the topic\n    this.topic.addSubscription(\n      new snsSubscriptions.SqsSubscription(this.queue)\n    );\n\n    // Create the lambda function\n    this.lambdaFunction = new lambda.Function(this, \"Function\", {\n      runtime: lambdaRuntime,\n      handler: \"stac_loader.handler.handler\",\n      vpc: props.vpc,\n      vpcSubnets: props.subnetSelection,\n      code: lambda.Code.fromDockerBuild(path.join(__dirname, \"..\"), {\n        file: \"stac-loader/runtime/Dockerfile\",\n        platform: \"linux/amd64\",\n        buildArgs: {\n          PYTHON_VERSION: lambdaRuntime.toString().replace(\"python\", \"\"),\n          PGSTAC_VERSION: props.pgstacDb.pgstacVersion,\n        },\n      }),\n      memorySize: props.memorySize ?? 1024,\n      timeout: Duration.seconds(timeoutSeconds),\n      reservedConcurrentExecutions: maxConcurrency,\n      logRetention: logs.RetentionDays.ONE_WEEK,\n      environment: {\n        PGSTAC_SECRET_ARN: props.pgstacDb.pgstacSecret.secretArn,\n        ...props.environment,\n      },\n      // overwrites defaults with user-provided configurable properties\n      ...props.lambdaFunctionOptions,\n    });\n\n    // Grant permissions to read the database secret\n    props.pgstacDb.pgstacSecret.grantRead(this.lambdaFunction);\n\n    // Add SQS event source to the lambda\n    this.lambdaFunction.addEventSource(\n      new lambdaEventSources.SqsEventSource(this.queue, {\n        batchSize: props.batchSize ?? 500,\n        maxBatchingWindow: Duration.minutes(\n          props.maxBatchingWindowMinutes ?? 1\n        ),\n        maxConcurrency: maxConcurrency,\n        reportBatchItemFailures: true,\n      })\n    );\n\n    // Create outputs\n    const exportPrefix = Stack.of(this).stackName;\n    new CfnOutput(this, \"TopicArn\", {\n      value: this.topic.topicArn,\n      description: \"ARN of the StacLoader SNS Topic\",\n      exportName: `${exportPrefix}-stac-loader-topic-arn`,\n    });\n\n    new CfnOutput(this, \"QueueUrl\", {\n      value: this.queue.queueUrl,\n      description: \"URL of the StacLoader SQS Queue\",\n      exportName: `${exportPrefix}-stac-loader-queue-url`,\n    });\n\n    new CfnOutput(this, \"DeadLetterQueueUrl\", {\n      value: this.deadLetterQueue.queueUrl,\n      description: \"URL of the StacLoader Dead Letter Queue\",\n      exportName: `${exportPrefix}-stac-loader-deadletter-queue-url`,\n    });\n\n    new CfnOutput(this, \"FunctionName\", {\n      value: this.lambdaFunction.functionName,\n      description: \"Name of the StacLoader Lambda Function\",\n      exportName: `${exportPrefix}-stac-loader-function-name`,\n    });\n  }\n}\n\n/**\n * @deprecated Use StacLoader instead. StacItemLoader will be removed in a future version.\n */\nexport class StacItemLoader extends StacLoader {\n  constructor(scope: Construct, id: string, props: StacLoaderProps) {\n    console.warn(\n      `StacItemLoader is deprecated. Please use StacLoader instead. ` +\n        `StacItemLoader will be removed in a future version.`\n    );\n\n    super(scope, id, props);\n  }\n}\n\n// Also create a deprecated interface alias if you had a separate interface\n/**\n * @deprecated Use StacLoaderProps instead. StacItemLoaderProps will be removed in a future version.\n */\nexport interface StacItemLoaderProps extends StacLoaderProps {}\n"]}
|
|
@@ -7,12 +7,12 @@ ENV PYTHONUNBUFFERED=1
|
|
|
7
7
|
|
|
8
8
|
WORKDIR /asset
|
|
9
9
|
|
|
10
|
-
COPY stac-
|
|
11
|
-
COPY stac-
|
|
10
|
+
COPY stac-loader/runtime/pyproject.toml pyproject.toml
|
|
11
|
+
COPY stac-loader/runtime/src/stac_loader/ stac_loader/
|
|
12
12
|
|
|
13
13
|
ARG PGSTAC_VERSION=0.9.6
|
|
14
14
|
RUN uv add --no-sync pypgstac==${PGSTAC_VERSION} && \
|
|
15
15
|
uv export --no-dev --no-editable -o requirements.txt && \
|
|
16
16
|
uv pip install --target /asset -r requirements.txt
|
|
17
17
|
|
|
18
|
-
CMD ["
|
|
18
|
+
CMD ["stac_loader.handler.handler"]
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
[project]
|
|
2
|
-
name = "stac-
|
|
2
|
+
name = "stac-loader"
|
|
3
3
|
version = "0.1.0"
|
|
4
|
-
description = "An application for loading STAC items into a pgstac database"
|
|
4
|
+
description = "An application for loading STAC collections and items into a pgstac database"
|
|
5
5
|
authors = [
|
|
6
6
|
{ name = "hrodmn", email = "henry@developmentseed.org" }
|
|
7
7
|
]
|
|
@@ -92,40 +92,40 @@ def is_s3_event(message_str: str) -> bool:
|
|
|
92
92
|
return "aws:s3" in message_str
|
|
93
93
|
|
|
94
94
|
|
|
95
|
-
def
|
|
96
|
-
"""Fetch STAC
|
|
95
|
+
def get_stac_object_from_s3(bucket_name: str, object_key: str) -> Dict[str, Any]:
|
|
96
|
+
"""Fetch STAC JSON from S3."""
|
|
97
97
|
session = boto3.session.Session()
|
|
98
98
|
s3_client = session.client("s3")
|
|
99
99
|
|
|
100
100
|
try:
|
|
101
|
-
logger.debug(f"Fetching STAC
|
|
101
|
+
logger.debug(f"Fetching STAC object from s3://{bucket_name}/{object_key}")
|
|
102
102
|
response = s3_client.get_object(Bucket=bucket_name, Key=object_key)
|
|
103
103
|
content = response["Body"].read()
|
|
104
104
|
|
|
105
105
|
try:
|
|
106
|
-
|
|
106
|
+
stac_json = content.decode("utf-8")
|
|
107
107
|
except UnicodeDecodeError as e:
|
|
108
108
|
logger.error(
|
|
109
109
|
f"Failed to decode S3 object as UTF-8: s3://{bucket_name}/{object_key}"
|
|
110
110
|
)
|
|
111
111
|
raise ValueError("S3 object is not valid UTF-8 text") from e
|
|
112
112
|
|
|
113
|
-
|
|
113
|
+
stac_data = json.loads(stac_json)
|
|
114
114
|
logger.debug(
|
|
115
|
-
f"Successfully parsed STAC
|
|
115
|
+
f"Successfully parsed STAC metadata from S3: {stac_data.get('id', 'unknown')}"
|
|
116
116
|
)
|
|
117
117
|
|
|
118
|
-
return
|
|
118
|
+
return stac_data
|
|
119
119
|
|
|
120
120
|
except Exception as e:
|
|
121
121
|
logger.error(
|
|
122
|
-
f"Failed to fetch STAC
|
|
122
|
+
f"Failed to fetch STAC metadata from s3://{bucket_name}/{object_key}: {e}"
|
|
123
123
|
)
|
|
124
124
|
raise
|
|
125
125
|
|
|
126
126
|
|
|
127
127
|
def process_s3_event(message_str: str) -> Dict[str, Any]:
|
|
128
|
-
"""Process an S3 event notification and return STAC
|
|
128
|
+
"""Process an S3 event notification and return STAC metadata."""
|
|
129
129
|
try:
|
|
130
130
|
message_data = json.loads(message_str)
|
|
131
131
|
records: List[Dict[str, Any]] = message_data.get("Records", [])
|
|
@@ -138,15 +138,15 @@ def process_s3_event(message_str: str) -> Dict[str, Any]:
|
|
|
138
138
|
bucket_name = s3_data["bucket"]["name"]
|
|
139
139
|
object_key = s3_data["object"]["key"]
|
|
140
140
|
|
|
141
|
-
# Validate that this looks like a STAC
|
|
141
|
+
# Validate that this looks like a STAC file
|
|
142
142
|
if not object_key.endswith((".json", ".geojson")):
|
|
143
143
|
raise ValueError(
|
|
144
|
-
f"S3 object key does not appear to be a STAC
|
|
144
|
+
f"S3 object key does not appear to be a STAC document: {object_key}"
|
|
145
145
|
)
|
|
146
146
|
|
|
147
|
-
|
|
147
|
+
stac_data = get_stac_object_from_s3(bucket_name, object_key)
|
|
148
148
|
|
|
149
|
-
return
|
|
149
|
+
return stac_data
|
|
150
150
|
|
|
151
151
|
except KeyError as e:
|
|
152
152
|
logger.error(f"S3 event missing required field: {e}")
|
|
@@ -169,8 +169,10 @@ def handler(
|
|
|
169
169
|
)
|
|
170
170
|
pgstac_dsn = get_pgstac_dsn()
|
|
171
171
|
|
|
172
|
-
|
|
172
|
+
batch_failures: List[BatchItemFailure] = []
|
|
173
173
|
|
|
174
|
+
collections: List[Dict[str, Any]] = []
|
|
175
|
+
collection_message_ids: List[str] = []
|
|
174
176
|
items_by_collection: DefaultDict[str, List[Dict[str, Any]]] = defaultdict(list)
|
|
175
177
|
message_ids_by_collection: DefaultDict[str, List[str]] = defaultdict(list)
|
|
176
178
|
|
|
@@ -194,21 +196,47 @@ def handler(
|
|
|
194
196
|
else:
|
|
195
197
|
message_data = json.loads(message_str)
|
|
196
198
|
|
|
197
|
-
|
|
199
|
+
if message_data["type"] == "Feature":
|
|
200
|
+
item = Item(**message_data)
|
|
198
201
|
|
|
199
|
-
|
|
200
|
-
|
|
202
|
+
if not item.collection:
|
|
203
|
+
raise KeyError(f"item {item.id} is missing a collection id!")
|
|
204
|
+
|
|
205
|
+
items_by_collection[item.collection].append(item.model_dump(mode="json"))
|
|
206
|
+
message_ids_by_collection[item.collection].append(message_id)
|
|
207
|
+
elif message_data["type"] == "Collection":
|
|
208
|
+
collection = Collection(**message_data)
|
|
209
|
+
collections.append(collection.model_dump(mode="json"))
|
|
210
|
+
collection_message_ids.append(message_id)
|
|
211
|
+
else:
|
|
212
|
+
raise ValueError(
|
|
213
|
+
f"expected either a 'Feature' or a 'Collection', received a {message_data['type']}"
|
|
214
|
+
)
|
|
201
215
|
|
|
202
|
-
items_by_collection[item.collection].append(item.model_dump(mode="json"))
|
|
203
|
-
message_ids_by_collection[item.collection].append(message_id)
|
|
204
216
|
logger.debug(f"[{message_id}] Successfully processed.")
|
|
205
217
|
|
|
206
218
|
except (ValueError, KeyError, ValidationError, json.JSONDecodeError) as e:
|
|
207
219
|
logger.error(f"[{message_id}] Failed with error: {e}", extra=record)
|
|
208
|
-
|
|
220
|
+
batch_failures.append({"itemIdentifier": message_id})
|
|
209
221
|
except Exception as e:
|
|
210
222
|
logger.error(f"[{message_id}] Unexpected error: {e}", extra=record)
|
|
211
|
-
|
|
223
|
+
batch_failures.append({"itemIdentifier": message_id})
|
|
224
|
+
|
|
225
|
+
if collections:
|
|
226
|
+
try:
|
|
227
|
+
with PgstacDB(dsn=pgstac_dsn) as db:
|
|
228
|
+
loader = Loader(db=db)
|
|
229
|
+
logger.info("loading collections into database.")
|
|
230
|
+
loader.load_collections(
|
|
231
|
+
file=collections, # type: ignore
|
|
232
|
+
insert_mode=Methods.upsert,
|
|
233
|
+
)
|
|
234
|
+
logger.info(f"successfully loaded {len(collections)} collections.")
|
|
235
|
+
except Exception as e:
|
|
236
|
+
logger.error(f"failed to load collections: {str(e)}")
|
|
237
|
+
batch_failures.extend(
|
|
238
|
+
[{"itemIdentifier": message_id} for message_id in collection_message_ids]
|
|
239
|
+
)
|
|
212
240
|
|
|
213
241
|
for collection_id, items in items_by_collection.items():
|
|
214
242
|
try:
|
|
@@ -248,21 +276,21 @@ def handler(
|
|
|
248
276
|
except Exception as e:
|
|
249
277
|
logger.error(f"[{collection_id}] failed to load items: {str(e)}")
|
|
250
278
|
|
|
251
|
-
|
|
279
|
+
batch_failures.extend(
|
|
252
280
|
[
|
|
253
281
|
{"itemIdentifier": message_id}
|
|
254
282
|
for message_id in message_ids_by_collection[collection_id]
|
|
255
283
|
]
|
|
256
284
|
)
|
|
257
285
|
|
|
258
|
-
if
|
|
286
|
+
if batch_failures:
|
|
259
287
|
logger.warning(
|
|
260
|
-
f"Finished processing batch. {len(
|
|
288
|
+
f"Finished processing batch. {len(batch_failures)} failure(s) reported."
|
|
261
289
|
)
|
|
262
290
|
logger.info(
|
|
263
|
-
f"Returning failed item identifiers: {[f['itemIdentifier'] for f in
|
|
291
|
+
f"Returning failed item identifiers: {[f['itemIdentifier'] for f in batch_failures]}"
|
|
264
292
|
)
|
|
265
|
-
return {"batchItemFailures":
|
|
293
|
+
return {"batchItemFailures": batch_failures}
|
|
266
294
|
else:
|
|
267
295
|
logger.info("Finished processing batch. All records successful.")
|
|
268
296
|
return None
|
|
@@ -93,7 +93,7 @@ export interface StactoolsItemGeneratorProps {
|
|
|
93
93
|
/**
|
|
94
94
|
* ARN of the SNS topic to publish generated items to.
|
|
95
95
|
*
|
|
96
|
-
* This is typically the topic from a
|
|
96
|
+
* This is typically the topic from a StacLoader construct.
|
|
97
97
|
* Generated STAC items will be published here for downstream
|
|
98
98
|
* processing and database insertion.
|
|
99
99
|
*/
|
|
@@ -158,7 +158,7 @@ export interface StactoolsItemGeneratorProps {
|
|
|
158
158
|
*
|
|
159
159
|
* ```typescript
|
|
160
160
|
* // Create item loader first (or get existing topic ARN)
|
|
161
|
-
* const loader = new
|
|
161
|
+
* const loader = new StacLoader(this, 'ItemLoader', {
|
|
162
162
|
* pgstacDb: database
|
|
163
163
|
* });
|
|
164
164
|
*
|