eoapi-cdk 8.1.1 → 8.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/.jsii +698 -26
  2. package/lib/bastion-host/index.js +1 -1
  3. package/lib/database/index.d.ts +1 -0
  4. package/lib/database/index.js +5 -5
  5. package/lib/index.d.ts +2 -0
  6. package/lib/index.js +3 -1
  7. package/lib/ingestor-api/index.js +1 -1
  8. package/lib/stac-api/index.js +1 -1
  9. package/lib/stac-browser/index.js +1 -1
  10. package/lib/stac-item-loader/index.d.ts +337 -0
  11. package/lib/stac-item-loader/index.js +255 -0
  12. package/lib/stac-item-loader/runtime/Dockerfile +18 -0
  13. package/lib/stac-item-loader/runtime/pyproject.toml +17 -0
  14. package/lib/stac-item-loader/runtime/src/stac_item_loader/handler.py +241 -0
  15. package/lib/stactools-item-generator/index.d.ts +258 -0
  16. package/lib/stactools-item-generator/index.js +208 -0
  17. package/lib/stactools-item-generator/runtime/Dockerfile +20 -0
  18. package/lib/stactools-item-generator/runtime/pyproject.toml +16 -0
  19. package/lib/stactools-item-generator/runtime/src/stactools_item_generator/__init__.py +2 -0
  20. package/lib/stactools-item-generator/runtime/src/stactools_item_generator/handler.py +176 -0
  21. package/lib/stactools-item-generator/runtime/src/stactools_item_generator/item.py +77 -0
  22. package/lib/tipg-api/index.js +1 -1
  23. package/lib/titiler-pgstac-api/index.js +1 -1
  24. package/package.json +1 -1
  25. package/pyproject.toml +45 -0
  26. package/uv.lock +1065 -0
  27. package/.devcontainer/devcontainer.json +0 -4
  28. package/.github/pull_request_template.md +0 -4
  29. package/.github/workflows/build.yaml +0 -73
  30. package/.github/workflows/build_and_release.yaml +0 -13
  31. package/.github/workflows/conventional-pr.yaml +0 -26
  32. package/.github/workflows/deploy.yaml +0 -84
  33. package/.github/workflows/distribute.yaml +0 -46
  34. package/.github/workflows/docs.yaml +0 -26
  35. package/.github/workflows/lint.yaml +0 -26
  36. package/.github/workflows/tox.yaml +0 -26
  37. package/.nvmrc +0 -1
  38. package/.pre-commit-config.yaml +0 -23
  39. package/CHANGELOG.md +0 -471
  40. package/diagrams/bastion_diagram.excalidraw +0 -1416
  41. package/diagrams/bastion_diagram.png +0 -0
  42. package/diagrams/ingestor_diagram.excalidraw +0 -2274
  43. package/diagrams/ingestor_diagram.png +0 -0
  44. package/integration_tests/cdk/README.md +0 -55
  45. package/integration_tests/cdk/app.py +0 -186
  46. package/integration_tests/cdk/cdk.json +0 -32
  47. package/integration_tests/cdk/config.py +0 -52
  48. package/integration_tests/cdk/package-lock.json +0 -42
  49. package/integration_tests/cdk/package.json +0 -7
  50. package/integration_tests/cdk/requirements.txt +0 -7
  51. package/lib/database/lambda/package-lock.json +0 -1324
  52. package/lib/ingestor-api/runtime/tests/conftest.py +0 -270
  53. package/lib/ingestor-api/runtime/tests/test_collection.py +0 -87
  54. package/lib/ingestor-api/runtime/tests/test_collection_endpoint.py +0 -41
  55. package/lib/ingestor-api/runtime/tests/test_ingestor.py +0 -60
  56. package/lib/ingestor-api/runtime/tests/test_registration.py +0 -207
  57. package/lib/ingestor-api/runtime/tests/test_utils.py +0 -35
  58. package/lib/ingestor-api/runtime/tests/test_validators.py +0 -164
  59. package/ruff.toml +0 -23
  60. package/tox.ini +0 -16
  61. package/tsconfig.tsbuildinfo +0 -1
  62. /package/lib/{ingestor-api/runtime/tests → stac-item-loader/runtime/src/stac_item_loader}/__init__.py +0 -0
@@ -0,0 +1,337 @@
1
+ import { aws_ec2 as ec2, aws_lambda as lambda, aws_sqs as sqs, aws_sns as sns } from "aws-cdk-lib";
2
+ import { Construct } from "constructs";
3
+ import { PgStacDatabase } from "../database";
4
+ import { CustomLambdaFunctionProps } from "../utils";
5
+ /**
6
+ * Configuration properties for the StacItemLoader construct.
7
+ *
8
+ * The StacItemLoader is part of a two-phase serverless STAC ingestion pipeline
9
+ * that loads STAC items into a pgstac database. This construct creates
10
+ * the infrastructure for receiving STAC items from multiple sources:
11
+ * 1. SNS messages containing STAC metadata (direct ingestion)
12
+ * 2. S3 event notifications for STAC items uploaded to S3 buckets
13
+ *
14
+ * Items from both sources are batched and inserted into PostgreSQL with the pgstac extension.
15
+ *
16
+ * @example
17
+ * const loader = new StacItemLoader(this, 'ItemLoader', {
18
+ * pgstacDb: database,
19
+ * batchSize: 1000,
20
+ * maxBatchingWindowMinutes: 1,
21
+ * lambdaTimeoutSeconds: 300
22
+ * });
23
+ */
24
+ export interface StacItemLoaderProps {
25
+ /**
26
+ * The PgSTAC database instance to load items into.
27
+ *
28
+ * This database must have the pgstac extension installed and be properly
29
+ * configured with collections before items can be loaded. The loader will
30
+ * use AWS Secrets Manager to securely access database credentials.
31
+ */
32
+ readonly pgstacDb: PgStacDatabase;
33
+ /**
34
+ * VPC into which the lambda should be deployed.
35
+ */
36
+ readonly vpc?: ec2.IVpc;
37
+ /**
38
+ * Subnet into which the lambda should be deployed.
39
+ */
40
+ readonly subnetSelection?: ec2.SubnetSelection;
41
+ /**
42
+ * The lambda runtime to use for the item loading function.
43
+ *
44
+ * The function is implemented in Python and uses pypgstac for database
45
+ * operations. Ensure the runtime version is compatible with the pgstac
46
+ * version specified in the database configuration.
47
+ *
48
+ * @default lambda.Runtime.PYTHON_3_11
49
+ */
50
+ readonly lambdaRuntime?: lambda.Runtime;
51
+ /**
52
+ * The timeout for the item load lambda in seconds.
53
+ *
54
+ * This should accommodate the time needed to process up to `batchSize`
55
+ * items and perform database insertions. The SQS visibility timeout
56
+ * will be set to this value plus 10 seconds.
57
+ *
58
+ * @default 300
59
+ */
60
+ readonly lambdaTimeoutSeconds?: number;
61
+ /**
62
+ * Memory size for the lambda function in MB.
63
+ *
64
+ * Higher memory allocation may improve performance when processing
65
+ * large batches of STAC items, especially for memory-intensive
66
+ * database operations.
67
+ *
68
+ * @default 1024
69
+ */
70
+ readonly memorySize?: number;
71
+ /**
72
+ * SQS batch size for lambda event source.
73
+ *
74
+ * This determines the maximum number of STAC items that will be
75
+ * processed together in a single lambda invocation. Larger batch
76
+ * sizes improve database insertion efficiency but require more
77
+ * memory and longer processing time.
78
+ *
79
+ * **Batching Behavior**: SQS will wait to accumulate up to this many
80
+ * messages before triggering the Lambda, OR until the maxBatchingWindow
81
+ * timeout is reached, whichever comes first. This creates an efficient
82
+ * balance between throughput and latency.
83
+ *
84
+ * @default 500
85
+ */
86
+ readonly batchSize?: number;
87
+ /**
88
+ * Maximum batching window in minutes.
89
+ *
90
+ * Even if the batch size isn't reached, the lambda will be triggered
91
+ * after this time period to ensure timely processing of items.
92
+ * This prevents items from waiting indefinitely in low-volume scenarios.
93
+ *
94
+ * **Important**: This timeout works in conjunction with batchSize - SQS
95
+ * will trigger the Lambda when EITHER the batch size is reached OR this
96
+ * time window expires, ensuring items are processed in a timely manner
97
+ * regardless of volume.
98
+ *
99
+ * @default 1
100
+ */
101
+ readonly maxBatchingWindowMinutes?: number;
102
+ /**
103
+ * Maximum concurrent executions for the StacItemLoader Lambda function
104
+ *
105
+ * This limit will be applied to the Lambda function and will control how
106
+ * many concurrent batches will be released from the SQS queue.
107
+ *
108
+ * @default 2
109
+ */
110
+ readonly maxConcurrency?: number;
111
+ /**
112
+ * Additional environment variables for the lambda function.
113
+ *
114
+ * These will be merged with the default environment variables including
115
+ * PGSTAC_SECRET_ARN. Use this for custom configuration or debugging flags.
116
+ */
117
+ readonly environment?: {
118
+ [key: string]: string;
119
+ };
120
+ /**
121
+ * Can be used to override the default lambda function properties.
122
+ *
123
+ * @default - defined in the construct.
124
+ */
125
+ readonly lambdaFunctionOptions?: CustomLambdaFunctionProps;
126
+ }
127
+ /**
128
+ * AWS CDK Construct for STAC Item Loading Infrastructure
129
+ *
130
+ * The StacItemLoader creates a serverless, event-driven system for loading
131
+ * STAC (SpatioTemporal Asset Catalog) items into a PostgreSQL database with
132
+ * the pgstac extension. This construct supports multiple ingestion pathways
133
+ * for flexible STAC item loading.
134
+ *
135
+ * ## Architecture Overview
136
+ *
137
+ * This construct creates the following AWS resources:
138
+ * - **SNS Topic**: Entry point for STAC items and S3 event notifications
139
+ * - **SQS Queue**: Buffers and batches messages before processing (60-second visibility timeout)
140
+ * - **Dead Letter Queue**: Captures failed loading attempts after 5 retries
141
+ * - **Lambda Function**: Python function that processes batches and inserts items into pgstac
142
+ *
143
+ * ## Data Flow
144
+ *
145
+ * The loader supports two primary data ingestion patterns:
146
+ *
147
+ * ### Direct STAC Item Publishing
148
+ * 1. STAC items (JSON) are published directly to the SNS topic in message bodies
149
+ * 2. The SQS queue collects messages and batches them (up to {batchSize} items or 1 minute window)
150
+ * 3. The Lambda function receives batches, validates items, and inserts into pgstac
151
+ *
152
+ * ### S3 Event-Driven Loading
153
+ * 1. An S3 bucket is configured to send notifications to the SNS topic when json files are created
154
+ * 2. STAC items are uploaded to S3 buckets as JSON/GeoJSON files
155
+ * 3. S3 event notifications are sent to the SNS topic when items are uploaded
156
+ * 4. The Lambda function receives S3 events in the SQS message batch, fetches items from S3, and loads into pgstac
157
+ *
158
+ * ## Batching Behavior
159
+ *
160
+ * The SQS-to-Lambda integration uses intelligent batching to optimize performance:
161
+ *
162
+ * - **Batch Size**: Lambda waits to receive up to `batchSize` messages (default: 500)
163
+ * - **Batching Window**: If fewer than `batchSize` messages are available, Lambda
164
+ * triggers after `maxBatchingWindow` minutes (default: 1 minute)
165
+ * - **Trigger Condition**: Lambda executes when EITHER condition is met first
166
+ * - **Concurrency**: Limited to `maxConcurrency` concurrent executions to prevent database overload
167
+ * - **Partial Failures**: Uses `reportBatchItemFailures` to retry only failed items
168
+ *
169
+ * This approach balances throughput (larger batches = fewer database connections)
170
+ * with latency (time-based triggers prevent indefinite waiting).
171
+ *
172
+ * ## Error Handling and Dead Letter Queue
173
+ *
174
+ * Failed messages are sent to the dead letter queue after 5 processing attempts.
175
+ * **Important**: This construct provides NO automated handling of dead letter queue
176
+ * messages - monitoring, inspection, and reprocessing of failed items is the
177
+ * responsibility of the implementing application.
178
+ *
179
+ * Consider implementing:
180
+ * - CloudWatch alarms on dead letter queue depth
181
+ * - Manual or automated reprocessing workflows
182
+ * - Logging and alerting for failed items
183
+ * - Regular cleanup of old dead letter messages (14-day retention)
184
+ *
185
+ * ## Operational Characteristics
186
+ *
187
+ * - **Scalability**: Lambda scales automatically based on queue depth
188
+ * - **Reliability**: Dead letter queue captures failures for debugging
189
+ * - **Efficiency**: Batching optimizes database operations for high throughput
190
+ * - **Security**: Database credentials accessed via AWS Secrets Manager
191
+ * - **Observability**: CloudWatch logs retained for one week
192
+ *
193
+ * ## Prerequisites
194
+ *
195
+ * Before using this construct, ensure:
196
+ * - The pgstac database has collections loaded (items require existing collection IDs)
197
+ * - Database credentials are stored in AWS Secrets Manager
198
+ * - The pgstac extension is properly installed and configured
199
+ *
200
+ * ## Usage Example
201
+ *
202
+ * ```typescript
203
+ * // Create database first
204
+ * const database = new PgStacDatabase(this, 'Database', {
205
+ * pgstacVersion: '0.9.5'
206
+ * });
207
+ *
208
+ * // Create item loader
209
+ * const loader = new StacItemLoader(this, 'ItemLoader', {
210
+ * pgstacDb: database,
211
+ * batchSize: 1000, // Process up to 1000 items per batch
212
+ * maxBatchingWindowMinutes: 1, // Wait max 1 minute to fill batch
213
+ * lambdaTimeoutSeconds: 300 // Allow up to 300 seconds for database operations
214
+ * });
215
+ *
216
+ * // The topic ARN can be used by other services to publish items
217
+ * new CfnOutput(this, 'LoaderTopicArn', {
218
+ * value: loader.topic.topicArn
219
+ * });
220
+ * ```
221
+ *
222
+ * ## Direct Item Publishing
223
+ *
224
+ * External services can publish STAC items directly to the topic:
225
+ *
226
+ * ```bash
227
+ * aws sns publish --topic-arn $ITEM_LOAD_TOPIC --message '{
228
+ * "type": "Feature",
229
+ * "stac_version": "1.0.0",
230
+ * "id": "example-item",
231
+ * "properties": {"datetime": "2021-01-01T00:00:00Z"},
232
+ * "geometry": {"type": "Polygon", "coordinates": [...]},
233
+ * "collection": "example-collection"
234
+ * }'
235
+ * ```
236
+ *
237
+ * ## S3 Event Configuration
238
+ *
239
+ * To enable S3 event-driven loading, configure S3 bucket notifications to send
240
+ * events to the SNS topic when STAC items (.json or .geojson files) are uploaded:
241
+ *
242
+ * ```typescript
243
+ * // Configure S3 bucket to send notifications to the loader topic
244
+ * bucket.addEventNotification(
245
+ * s3.EventType.OBJECT_CREATED,
246
+ * new s3n.SnsDestination(loader.topic),
247
+ * { suffix: '.json' }
248
+ * );
249
+ *
250
+ * bucket.addEventNotification(
251
+ * s3.EventType.OBJECT_CREATED,
252
+ * new s3n.SnsDestination(loader.topic),
253
+ * { suffix: '.geojson' }
254
+ * );
255
+ * ```
256
+ *
257
+ * When STAC items are uploaded to the configured S3 bucket, the loader will:
258
+ * 1. Receive S3 event notifications via SNS
259
+ * 2. Fetch the STAC item JSON from S3
260
+ * 3. Validate and load the item into the pgstac database
261
+ *
262
+ * ## Monitoring and Troubleshooting
263
+ *
264
+ * - Monitor Lambda logs: `/aws/lambda/{FunctionName}`
265
+ * - **Dead Letter Queue**: Check for failed items - **no automated handling provided**
266
+ * - Use batch item failure reporting for partial batch processing
267
+ * - CloudWatch metrics available for queue depth and Lambda performance
268
+ *
269
+ * ### Dead Letter Queue Management
270
+ *
271
+ * Applications must implement their own dead letter queue monitoring:
272
+ *
273
+ * ```typescript
274
+ * // Example: CloudWatch alarm for dead letter queue depth
275
+ * new cloudwatch.Alarm(this, 'DeadLetterAlarm', {
276
+ * metric: loader.deadLetterQueue.metricApproximateNumberOfVisibleMessages(),
277
+ * threshold: 1,
278
+ * evaluationPeriods: 1
279
+ * });
280
+ *
281
+ * // Example: Lambda to reprocess dead letter messages
282
+ * const reprocessFunction = new lambda.Function(this, 'Reprocess', {
283
+ * // Implementation to fetch and republish failed messages
284
+ * });
285
+ * ```
286
+ *
287
+ */
288
+ export declare class StacItemLoader extends Construct {
289
+ /**
290
+ * The SNS topic that receives STAC items and S3 event notifications for loading.
291
+ *
292
+ * This topic serves as the entry point for two types of events:
293
+ * 1. Direct STAC item JSON documents published by external services
294
+ * 2. S3 event notifications when STAC items are uploaded to configured buckets
295
+ *
296
+ * The topic fans out to the SQS queue for batched processing.
297
+ */
298
+ readonly topic: sns.Topic;
299
+ /**
300
+ * The SQS queue that buffers messages before processing.
301
+ *
302
+ * This queue collects both direct STAC items from SNS and S3 event
303
+ * notifications, batching them for efficient database operations.
304
+ * Configured with a visibility timeout that accommodates Lambda
305
+ * processing time plus buffer.
306
+ */
307
+ readonly queue: sqs.Queue;
308
+ /**
309
+ * Dead letter queue for failed item loading attempts.
310
+ *
311
+ * Messages that fail processing after 5 attempts are sent here
312
+ * for inspection and potential replay. Retains messages for 14 days
313
+ * to allow for debugging and manual intervention.
314
+ *
315
+ * **User Responsibility**: This construct provides NO automated monitoring,
316
+ * alerting, or reprocessing of dead letter queue messages. Applications
317
+ * using this construct must implement their own:
318
+ * - Dead letter queue depth monitoring and alerting
319
+ * - Failed message inspection and debugging workflows
320
+ * - Manual or automated reprocessing mechanisms
321
+ * - Cleanup procedures for old failed messages
322
+ */
323
+ readonly deadLetterQueue: sqs.Queue;
324
+ /**
325
+ * The Lambda function that loads STAC items into the pgstac database.
326
+ *
327
+ * This Python function receives batches of messages from SQS and processes
328
+ * them based on their type:
329
+ * - Direct STAC items: Validates and loads directly into pgstac
330
+ * - S3 events: Fetches STAC items from S3, validates, and loads into pgstac
331
+ *
332
+ * The function connects to PostgreSQL using credentials from Secrets Manager
333
+ * and uses pypgstac for efficient database operations.
334
+ */
335
+ readonly lambdaFunction: lambda.Function;
336
+ constructor(scope: Construct, id: string, props: StacItemLoaderProps);
337
+ }
@@ -0,0 +1,255 @@
1
+ "use strict";
2
+ var _a;
3
+ Object.defineProperty(exports, "__esModule", { value: true });
4
+ exports.StacItemLoader = void 0;
5
+ const JSII_RTTI_SYMBOL_1 = Symbol.for("jsii.rtti");
6
+ const aws_cdk_lib_1 = require("aws-cdk-lib");
7
+ const constructs_1 = require("constructs");
8
+ const path = require("path");
9
+ /**
10
+ * AWS CDK Construct for STAC Item Loading Infrastructure
11
+ *
12
+ * The StacItemLoader creates a serverless, event-driven system for loading
13
+ * STAC (SpatioTemporal Asset Catalog) items into a PostgreSQL database with
14
+ * the pgstac extension. This construct supports multiple ingestion pathways
15
+ * for flexible STAC item loading.
16
+ *
17
+ * ## Architecture Overview
18
+ *
19
+ * This construct creates the following AWS resources:
20
+ * - **SNS Topic**: Entry point for STAC items and S3 event notifications
21
+ * - **SQS Queue**: Buffers and batches messages before processing (60-second visibility timeout)
22
+ * - **Dead Letter Queue**: Captures failed loading attempts after 5 retries
23
+ * - **Lambda Function**: Python function that processes batches and inserts items into pgstac
24
+ *
25
+ * ## Data Flow
26
+ *
27
+ * The loader supports two primary data ingestion patterns:
28
+ *
29
+ * ### Direct STAC Item Publishing
30
+ * 1. STAC items (JSON) are published directly to the SNS topic in message bodies
31
+ * 2. The SQS queue collects messages and batches them (up to {batchSize} items or 1 minute window)
32
+ * 3. The Lambda function receives batches, validates items, and inserts into pgstac
33
+ *
34
+ * ### S3 Event-Driven Loading
35
+ * 1. An S3 bucket is configured to send notifications to the SNS topic when json files are created
36
+ * 2. STAC items are uploaded to S3 buckets as JSON/GeoJSON files
37
+ * 3. S3 event notifications are sent to the SNS topic when items are uploaded
38
+ * 4. The Lambda function receives S3 events in the SQS message batch, fetches items from S3, and loads into pgstac
39
+ *
40
+ * ## Batching Behavior
41
+ *
42
+ * The SQS-to-Lambda integration uses intelligent batching to optimize performance:
43
+ *
44
+ * - **Batch Size**: Lambda waits to receive up to `batchSize` messages (default: 500)
45
+ * - **Batching Window**: If fewer than `batchSize` messages are available, Lambda
46
+ * triggers after `maxBatchingWindow` minutes (default: 1 minute)
47
+ * - **Trigger Condition**: Lambda executes when EITHER condition is met first
48
+ * - **Concurrency**: Limited to `maxConcurrency` concurrent executions to prevent database overload
49
+ * - **Partial Failures**: Uses `reportBatchItemFailures` to retry only failed items
50
+ *
51
+ * This approach balances throughput (larger batches = fewer database connections)
52
+ * with latency (time-based triggers prevent indefinite waiting).
53
+ *
54
+ * ## Error Handling and Dead Letter Queue
55
+ *
56
+ * Failed messages are sent to the dead letter queue after 5 processing attempts.
57
+ * **Important**: This construct provides NO automated handling of dead letter queue
58
+ * messages - monitoring, inspection, and reprocessing of failed items is the
59
+ * responsibility of the implementing application.
60
+ *
61
+ * Consider implementing:
62
+ * - CloudWatch alarms on dead letter queue depth
63
+ * - Manual or automated reprocessing workflows
64
+ * - Logging and alerting for failed items
65
+ * - Regular cleanup of old dead letter messages (14-day retention)
66
+ *
67
+ * ## Operational Characteristics
68
+ *
69
+ * - **Scalability**: Lambda scales automatically based on queue depth
70
+ * - **Reliability**: Dead letter queue captures failures for debugging
71
+ * - **Efficiency**: Batching optimizes database operations for high throughput
72
+ * - **Security**: Database credentials accessed via AWS Secrets Manager
73
+ * - **Observability**: CloudWatch logs retained for one week
74
+ *
75
+ * ## Prerequisites
76
+ *
77
+ * Before using this construct, ensure:
78
+ * - The pgstac database has collections loaded (items require existing collection IDs)
79
+ * - Database credentials are stored in AWS Secrets Manager
80
+ * - The pgstac extension is properly installed and configured
81
+ *
82
+ * ## Usage Example
83
+ *
84
+ * ```typescript
85
+ * // Create database first
86
+ * const database = new PgStacDatabase(this, 'Database', {
87
+ * pgstacVersion: '0.9.5'
88
+ * });
89
+ *
90
+ * // Create item loader
91
+ * const loader = new StacItemLoader(this, 'ItemLoader', {
92
+ * pgstacDb: database,
93
+ * batchSize: 1000, // Process up to 1000 items per batch
94
+ * maxBatchingWindowMinutes: 1, // Wait max 1 minute to fill batch
95
+ * lambdaTimeoutSeconds: 300 // Allow up to 300 seconds for database operations
96
+ * });
97
+ *
98
+ * // The topic ARN can be used by other services to publish items
99
+ * new CfnOutput(this, 'LoaderTopicArn', {
100
+ * value: loader.topic.topicArn
101
+ * });
102
+ * ```
103
+ *
104
+ * ## Direct Item Publishing
105
+ *
106
+ * External services can publish STAC items directly to the topic:
107
+ *
108
+ * ```bash
109
+ * aws sns publish --topic-arn $ITEM_LOAD_TOPIC --message '{
110
+ * "type": "Feature",
111
+ * "stac_version": "1.0.0",
112
+ * "id": "example-item",
113
+ * "properties": {"datetime": "2021-01-01T00:00:00Z"},
114
+ * "geometry": {"type": "Polygon", "coordinates": [...]},
115
+ * "collection": "example-collection"
116
+ * }'
117
+ * ```
118
+ *
119
+ * ## S3 Event Configuration
120
+ *
121
+ * To enable S3 event-driven loading, configure S3 bucket notifications to send
122
+ * events to the SNS topic when STAC items (.json or .geojson files) are uploaded:
123
+ *
124
+ * ```typescript
125
+ * // Configure S3 bucket to send notifications to the loader topic
126
+ * bucket.addEventNotification(
127
+ * s3.EventType.OBJECT_CREATED,
128
+ * new s3n.SnsDestination(loader.topic),
129
+ * { suffix: '.json' }
130
+ * );
131
+ *
132
+ * bucket.addEventNotification(
133
+ * s3.EventType.OBJECT_CREATED,
134
+ * new s3n.SnsDestination(loader.topic),
135
+ * { suffix: '.geojson' }
136
+ * );
137
+ * ```
138
+ *
139
+ * When STAC items are uploaded to the configured S3 bucket, the loader will:
140
+ * 1. Receive S3 event notifications via SNS
141
+ * 2. Fetch the STAC item JSON from S3
142
+ * 3. Validate and load the item into the pgstac database
143
+ *
144
+ * ## Monitoring and Troubleshooting
145
+ *
146
+ * - Monitor Lambda logs: `/aws/lambda/{FunctionName}`
147
+ * - **Dead Letter Queue**: Check for failed items - **no automated handling provided**
148
+ * - Use batch item failure reporting for partial batch processing
149
+ * - CloudWatch metrics available for queue depth and Lambda performance
150
+ *
151
+ * ### Dead Letter Queue Management
152
+ *
153
+ * Applications must implement their own dead letter queue monitoring:
154
+ *
155
+ * ```typescript
156
+ * // Example: CloudWatch alarm for dead letter queue depth
157
+ * new cloudwatch.Alarm(this, 'DeadLetterAlarm', {
158
+ * metric: loader.deadLetterQueue.metricApproximateNumberOfVisibleMessages(),
159
+ * threshold: 1,
160
+ * evaluationPeriods: 1
161
+ * });
162
+ *
163
+ * // Example: Lambda to reprocess dead letter messages
164
+ * const reprocessFunction = new lambda.Function(this, 'Reprocess', {
165
+ * // Implementation to fetch and republish failed messages
166
+ * });
167
+ * ```
168
+ *
169
+ */
170
+ class StacItemLoader extends constructs_1.Construct {
171
+ constructor(scope, id, props) {
172
+ super(scope, id);
173
+ const timeoutSeconds = props.lambdaTimeoutSeconds ?? 300;
174
+ const lambdaRuntime = props.lambdaRuntime ?? aws_cdk_lib_1.aws_lambda.Runtime.PYTHON_3_11;
175
+ const maxConcurrency = props.maxConcurrency ?? 2;
176
+ // Create dead letter queue
177
+ this.deadLetterQueue = new aws_cdk_lib_1.aws_sqs.Queue(this, "DeadLetterQueue", {
178
+ retentionPeriod: aws_cdk_lib_1.Duration.days(14),
179
+ });
180
+ // Create main queue
181
+ this.queue = new aws_cdk_lib_1.aws_sqs.Queue(this, "Queue", {
182
+ visibilityTimeout: aws_cdk_lib_1.Duration.seconds(timeoutSeconds + 10),
183
+ encryption: aws_cdk_lib_1.aws_sqs.QueueEncryption.SQS_MANAGED,
184
+ deadLetterQueue: {
185
+ maxReceiveCount: 5,
186
+ queue: this.deadLetterQueue,
187
+ },
188
+ });
189
+ // Create SNS topic
190
+ this.topic = new aws_cdk_lib_1.aws_sns.Topic(this, "Topic", {
191
+ displayName: `${id}-StacItemLoaderTopic`,
192
+ });
193
+ // Subscribe the queue to the topic
194
+ this.topic.addSubscription(new aws_cdk_lib_1.aws_sns_subscriptions.SqsSubscription(this.queue));
195
+ // Create the lambda function
196
+ this.lambdaFunction = new aws_cdk_lib_1.aws_lambda.Function(this, "Function", {
197
+ runtime: lambdaRuntime,
198
+ handler: "stac_item_loader.handler.handler",
199
+ vpc: props.vpc,
200
+ vpcSubnets: props.subnetSelection,
201
+ code: aws_cdk_lib_1.aws_lambda.Code.fromDockerBuild(path.join(__dirname, ".."), {
202
+ file: "stac-item-loader/runtime/Dockerfile",
203
+ platform: "linux/amd64",
204
+ buildArgs: {
205
+ PYTHON_VERSION: lambdaRuntime.toString().replace("python", ""),
206
+ PGSTAC_VERSION: props.pgstacDb.pgstacVersion,
207
+ },
208
+ }),
209
+ memorySize: props.memorySize ?? 1024,
210
+ timeout: aws_cdk_lib_1.Duration.seconds(timeoutSeconds),
211
+ reservedConcurrentExecutions: maxConcurrency,
212
+ logRetention: aws_cdk_lib_1.aws_logs.RetentionDays.ONE_WEEK,
213
+ environment: {
214
+ PGSTAC_SECRET_ARN: props.pgstacDb.pgstacSecret.secretArn,
215
+ ...props.environment,
216
+ },
217
+ // overwrites defaults with user-provided configurable properties
218
+ ...props.lambdaFunctionOptions,
219
+ });
220
+ // Grant permissions to read the database secret
221
+ props.pgstacDb.pgstacSecret.grantRead(this.lambdaFunction);
222
+ // Add SQS event source to the lambda
223
+ this.lambdaFunction.addEventSource(new aws_cdk_lib_1.aws_lambda_event_sources.SqsEventSource(this.queue, {
224
+ batchSize: props.batchSize ?? 500,
225
+ maxBatchingWindow: aws_cdk_lib_1.Duration.minutes(props.maxBatchingWindowMinutes ?? 1),
226
+ maxConcurrency: maxConcurrency,
227
+ reportBatchItemFailures: true,
228
+ }));
229
+ // Create outputs
230
+ new aws_cdk_lib_1.CfnOutput(this, "TopicArn", {
231
+ value: this.topic.topicArn,
232
+ description: "ARN of the StacItemLoader SNS Topic",
233
+ exportName: "stac-item-loader-topic-arn",
234
+ });
235
+ new aws_cdk_lib_1.CfnOutput(this, "QueueUrl", {
236
+ value: this.queue.queueUrl,
237
+ description: "URL of the StacItemLoader SQS Queue",
238
+ exportName: "stac-item-loader-queue-url",
239
+ });
240
+ new aws_cdk_lib_1.CfnOutput(this, "DeadLetterQueueUrl", {
241
+ value: this.deadLetterQueue.queueUrl,
242
+ description: "URL of the StacItemLoader Dead Letter Queue",
243
+ exportName: "stac-item-loader-deadletter-queue-url",
244
+ });
245
+ new aws_cdk_lib_1.CfnOutput(this, "FunctionName", {
246
+ value: this.lambdaFunction.functionName,
247
+ description: "Name of the StacItemLoader Lambda Function",
248
+ exportName: "stac-item-loader-function-name",
249
+ });
250
+ }
251
+ }
252
+ exports.StacItemLoader = StacItemLoader;
253
+ _a = JSII_RTTI_SYMBOL_1;
254
+ StacItemLoader[_a] = { fqn: "eoapi-cdk.StacItemLoader", version: "8.2.1" };
255
+ //# sourceMappingURL=data:application/json;base64,{"version":3,"file":"index.js","sourceRoot":"","sources":["index.ts"],"names":[],"mappings":";;;;;AAAA,6CAUqB;AACrB,2CAAuC;AAEvC,6BAA6B;AAsI7B;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgKG;AACH,MAAa,cAAe,SAAQ,sBAAS;IAoD3C,YAAY,KAAgB,EAAE,EAAU,EAAE,KAA0B;QAClE,KAAK,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QAEjB,MAAM,cAAc,GAAG,KAAK,CAAC,oBAAoB,IAAI,GAAG,CAAC;QACzD,MAAM,aAAa,GAAG,KAAK,CAAC,aAAa,IAAI,wBAAM,CAAC,OAAO,CAAC,WAAW,CAAC;QACxE,MAAM,cAAc,GAAG,KAAK,CAAC,cAAc,IAAI,CAAC,CAAC;QAEjD,2BAA2B;QAC3B,IAAI,CAAC,eAAe,GAAG,IAAI,qBAAG,CAAC,KAAK,CAAC,IAAI,EAAE,iBAAiB,EAAE;YAC5D,eAAe,EAAE,sBAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;SACnC,CAAC,CAAC;QAEH,oBAAoB;QACpB,IAAI,CAAC,KAAK,GAAG,IAAI,qBAAG,CAAC,KAAK,CAAC,IAAI,EAAE,OAAO,EAAE;YACxC,iBAAiB,EAAE,sBAAQ,CAAC,OAAO,CAAC,cAAc,GAAG,EAAE,CAAC;YACxD,UAAU,EAAE,qBAAG,CAAC,eAAe,CAAC,WAAW;YAC3C,eAAe,EAAE;gBACf,eAAe,EAAE,CAAC;gBAClB,KAAK,EAAE,IAAI,CAAC,eAAe;aAC5B;SACF,CAAC,CAAC;QAEH,mBAAmB;QACnB,IAAI,CAAC,KAAK,GAAG,IAAI,qBAAG,CAAC,KAAK,CAAC,IAAI,EAAE,OAAO,EAAE;YACxC,WAAW,EAAE,GAAG,EAAE,sBAAsB;SACzC,CAAC,CAAC;QAEH,mCAAmC;QACnC,IAAI,CAAC,KAAK,CAAC,eAAe,CACxB,IAAI,mCAAgB,CAAC,eAAe,CAAC,IAAI,CAAC,KAAK,CAAC,CACjD,CAAC;QAEF,6BAA6B;QAC7B,IAAI,CAAC,cAAc,GAAG,IAAI,wBAAM,CAAC,QAAQ,CAAC,IAAI,EAAE,UAAU,EAAE;YAC1D,OAAO,EAAE,aAAa;YACtB,OAAO,EAAE,kCAAkC;YAC3C,GAAG,EAAE,KAAK,CAAC,GAAG;YACd,UAAU,EAAE,KAAK,CAAC,eAAe;YACjC,IAAI,EAAE,wBAAM,CAAC,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,EAAE;gBAC5D,IAAI,EAAE,qCAAqC;gBAC3C,QAAQ,EAAE,aAAa;gBACvB,SAAS,EAAE;oBACT,cAAc,EAAE,aAAa,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC;oBAC9D,cAAc,EAAE,KAAK,CAAC,QAAQ,CAAC,aAAa;iBAC7C;aACF,CAAC;YACF,UAAU,EAAE,KAAK,CAAC,UAAU,IAAI,IAAI;YACpC,OAAO,EAAE,sBAAQ,CAAC,OAAO,CAAC,cAAc,CAAC;YACzC,4BAA4B,EAAE,cAAc;YAC5C,YAAY,EAAE,sBAAI,CAAC,aAAa,CAAC,QAAQ;YACzC,WAAW,EAAE;gBACX,iBAAiB,EAAE,KAAK,CAAC,QAAQ,CAAC,YAAY,CAAC,SAAS;gBACxD,GAAG,KAAK,CAAC,WAAW;aACrB;YACD,iEAAiE;YACjE,GAAG,KAAK,CAAC,qBAAqB;SAC/B,CAAC,CAAC;QAEH,gDAAgD;QAChD,KAAK,CAAC,QAAQ,CAAC,YAAY,CAAC,SAAS,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAE3D,qCAAqC;QACrC,IAAI,CAAC,cAAc,CAAC,cAAc,CAChC,IAAI,sCAAkB,CAAC,cAAc,CAAC,IAAI,CAAC,KAAK,EAAE;YAChD,SAAS,EAAE,KAAK,CAAC,SAAS,IAAI,GAAG;YACjC,iBAAiB,EAAE,sBAAQ,CAAC,OAAO,CACjC,KAAK,CAAC,wBAAwB,IAAI,CAAC,CACpC;YACD,cAAc,EAAE,cAAc;YAC9B,uBAAuB,EAAE,IAAI;SAC9B,CAAC,CACH,CAAC;QAEF,iBAAiB;QACjB,IAAI,uBAAS,CAAC,IAAI,EAAE,UAAU,EAAE;YAC9B,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,QAAQ;YAC1B,WAAW,EAAE,qCAAqC;YAClD,UAAU,EAAE,4BAA4B;SACzC,CAAC,CAAC;QAEH,IAAI,uBAAS,CAAC,IAAI,EAAE,UAAU,EAAE;YAC9B,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,QAAQ;YAC1B,WAAW,EAAE,qCAAqC;YAClD,UAAU,EAAE,4BAA4B;SACzC,CAAC,CAAC;QAEH,IAAI,uBAAS,CAAC,IAAI,EAAE,oBAAoB,EAAE;YACxC,KAAK,EAAE,IAAI,CAAC,eAAe,CAAC,QAAQ;YACpC,WAAW,EAAE,6CAA6C;YAC1D,UAAU,EAAE,uCAAuC;SACpD,CAAC,CAAC;QAEH,IAAI,uBAAS,CAAC,IAAI,EAAE,cAAc,EAAE;YAClC,KAAK,EAAE,IAAI,CAAC,cAAc,CAAC,YAAY;YACvC,WAAW,EAAE,4CAA4C;YACzD,UAAU,EAAE,gCAAgC;SAC7C,CAAC,CAAC;IACL,CAAC;;AArJH,wCAsJC","sourcesContent":["import {\n  aws_ec2 as ec2,\n  aws_lambda as lambda,\n  aws_sqs as sqs,\n  aws_sns as sns,\n  aws_sns_subscriptions as snsSubscriptions,\n  aws_lambda_event_sources as lambdaEventSources,\n  aws_logs as logs,\n  Duration,\n  CfnOutput,\n} from \"aws-cdk-lib\";\nimport { Construct } from \"constructs\";\nimport { PgStacDatabase } from \"../database\";\nimport * as path from \"path\";\nimport { CustomLambdaFunctionProps } from \"../utils\";\n\n/**\n * Configuration properties for the StacItemLoader construct.\n *\n * The StacItemLoader is part of a two-phase serverless STAC ingestion pipeline\n * that loads STAC items into a pgstac database. This construct creates\n * the infrastructure for receiving STAC items from multiple sources:\n * 1. SNS messages containing STAC metadata (direct ingestion)\n * 2. S3 event notifications for STAC items uploaded to S3 buckets\n *\n * Items from both sources are batched and inserted into PostgreSQL with the pgstac extension.\n *\n * @example\n * const loader = new StacItemLoader(this, 'ItemLoader', {\n *   pgstacDb: database,\n *   batchSize: 1000,\n *   maxBatchingWindowMinutes: 1,\n *   lambdaTimeoutSeconds: 300\n * });\n */\nexport interface StacItemLoaderProps {\n  /**\n   * The PgSTAC database instance to load items into.\n   *\n   * This database must have the pgstac extension installed and be properly\n   * configured with collections before items can be loaded. The loader will\n   * use AWS Secrets Manager to securely access database credentials.\n   */\n  readonly pgstacDb: PgStacDatabase;\n\n  /**\n   * VPC into which the lambda should be deployed.\n   */\n  readonly vpc?: ec2.IVpc;\n\n  /**\n   * Subnet into which the lambda should be deployed.\n   */\n  readonly subnetSelection?: ec2.SubnetSelection;\n\n  /**\n   * The lambda runtime to use for the item loading function.\n   *\n   * The function is implemented in Python and uses pypgstac for database\n   * operations. Ensure the runtime version is compatible with the pgstac\n   * version specified in the database configuration.\n   *\n   * @default lambda.Runtime.PYTHON_3_11\n   */\n  readonly lambdaRuntime?: lambda.Runtime;\n\n  /**\n   * The timeout for the item load lambda in seconds.\n   *\n   * This should accommodate the time needed to process up to `batchSize`\n   * items and perform database insertions. The SQS visibility timeout\n   * will be set to this value plus 10 seconds.\n   *\n   * @default 300\n   */\n  readonly lambdaTimeoutSeconds?: number;\n\n  /**\n   * Memory size for the lambda function in MB.\n   *\n   * Higher memory allocation may improve performance when processing\n   * large batches of STAC items, especially for memory-intensive\n   * database operations.\n   *\n   * @default 1024\n   */\n  readonly memorySize?: number;\n\n  /**\n   * SQS batch size for lambda event source.\n   *\n   * This determines the maximum number of STAC items that will be\n   * processed together in a single lambda invocation. Larger batch\n   * sizes improve database insertion efficiency but require more\n   * memory and longer processing time.\n   *\n   * **Batching Behavior**: SQS will wait to accumulate up to this many\n   * messages before triggering the Lambda, OR until the maxBatchingWindow\n   * timeout is reached, whichever comes first. This creates an efficient\n   * balance between throughput and latency.\n   *\n   * @default 500\n   */\n  readonly batchSize?: number;\n\n  /**\n   * Maximum batching window in minutes.\n   *\n   * Even if the batch size isn't reached, the lambda will be triggered\n   * after this time period to ensure timely processing of items.\n   * This prevents items from waiting indefinitely in low-volume scenarios.\n   *\n   * **Important**: This timeout works in conjunction with batchSize - SQS\n   * will trigger the Lambda when EITHER the batch size is reached OR this\n   * time window expires, ensuring items are processed in a timely manner\n   * regardless of volume.\n   *\n   * @default 1\n   */\n  readonly maxBatchingWindowMinutes?: number;\n\n  /**\n   * Maximum concurrent executions for the StacItemLoader Lambda function\n   *\n   * This limit will be applied to the Lambda function and will control how\n   * many concurrent batches will be released from the SQS queue.\n   *\n   * @default 2\n   */\n  readonly maxConcurrency?: number;\n\n  /**\n   * Additional environment variables for the lambda function.\n   *\n   * These will be merged with the default environment variables including\n   * PGSTAC_SECRET_ARN. Use this for custom configuration or debugging flags.\n   */\n  readonly environment?: { [key: string]: string };\n\n  /**\n   * Can be used to override the default lambda function properties.\n   *\n   * @default - defined in the construct.\n   */\n  readonly lambdaFunctionOptions?: CustomLambdaFunctionProps;\n}\n\n/**\n * AWS CDK Construct for STAC Item Loading Infrastructure\n *\n * The StacItemLoader creates a serverless, event-driven system for loading\n * STAC (SpatioTemporal Asset Catalog) items into a PostgreSQL database with\n * the pgstac extension. This construct supports multiple ingestion pathways\n * for flexible STAC item loading.\n *\n * ## Architecture Overview\n *\n * This construct creates the following AWS resources:\n * - **SNS Topic**: Entry point for STAC items and S3 event notifications\n * - **SQS Queue**: Buffers and batches messages before processing (60-second visibility timeout)\n * - **Dead Letter Queue**: Captures failed loading attempts after 5 retries\n * - **Lambda Function**: Python function that processes batches and inserts items into pgstac\n *\n * ## Data Flow\n *\n * The loader supports two primary data ingestion patterns:\n *\n * ### Direct STAC Item Publishing\n * 1. STAC items (JSON) are published directly to the SNS topic in message bodies\n * 2. The SQS queue collects messages and batches them (up to {batchSize} items or 1 minute window)\n * 3. The Lambda function receives batches, validates items, and inserts into pgstac\n *\n * ### S3 Event-Driven Loading\n * 1. An S3 bucket is configured to send notifications to the SNS topic when json files are created\n * 2. STAC items are uploaded to S3 buckets as JSON/GeoJSON files\n * 3. S3 event notifications are sent to the SNS topic when items are uploaded\n * 4. The Lambda function receives S3 events in the SQS message batch, fetches items from S3, and loads into pgstac\n *\n * ## Batching Behavior\n *\n * The SQS-to-Lambda integration uses intelligent batching to optimize performance:\n *\n * - **Batch Size**: Lambda waits to receive up to `batchSize` messages (default: 500)\n * - **Batching Window**: If fewer than `batchSize` messages are available, Lambda\n *   triggers after `maxBatchingWindow` minutes (default: 1 minute)\n * - **Trigger Condition**: Lambda executes when EITHER condition is met first\n * - **Concurrency**: Limited to `maxConcurrency` concurrent executions to prevent database overload\n * - **Partial Failures**: Uses `reportBatchItemFailures` to retry only failed items\n *\n * This approach balances throughput (larger batches = fewer database connections)\n * with latency (time-based triggers prevent indefinite waiting).\n *\n * ## Error Handling and Dead Letter Queue\n *\n * Failed messages are sent to the dead letter queue after 5 processing attempts.\n * **Important**: This construct provides NO automated handling of dead letter queue\n * messages - monitoring, inspection, and reprocessing of failed items is the\n * responsibility of the implementing application.\n *\n * Consider implementing:\n * - CloudWatch alarms on dead letter queue depth\n * - Manual or automated reprocessing workflows\n * - Logging and alerting for failed items\n * - Regular cleanup of old dead letter messages (14-day retention)\n *\n * ## Operational Characteristics\n *\n * - **Scalability**: Lambda scales automatically based on queue depth\n * - **Reliability**: Dead letter queue captures failures for debugging\n * - **Efficiency**: Batching optimizes database operations for high throughput\n * - **Security**: Database credentials accessed via AWS Secrets Manager\n * - **Observability**: CloudWatch logs retained for one week\n *\n * ## Prerequisites\n *\n * Before using this construct, ensure:\n * - The pgstac database has collections loaded (items require existing collection IDs)\n * - Database credentials are stored in AWS Secrets Manager\n * - The pgstac extension is properly installed and configured\n *\n * ## Usage Example\n *\n * ```typescript\n * // Create database first\n * const database = new PgStacDatabase(this, 'Database', {\n *   pgstacVersion: '0.9.5'\n * });\n *\n * // Create item loader\n * const loader = new StacItemLoader(this, 'ItemLoader', {\n *   pgstacDb: database,\n *   batchSize: 1000,          // Process up to 1000 items per batch\n *   maxBatchingWindowMinutes: 1, // Wait max 1 minute to fill batch\n *   lambdaTimeoutSeconds: 300     // Allow up to 300 seconds for database operations\n * });\n *\n * // The topic ARN can be used by other services to publish items\n * new CfnOutput(this, 'LoaderTopicArn', {\n *   value: loader.topic.topicArn\n * });\n * ```\n *\n * ## Direct Item Publishing\n *\n * External services can publish STAC items directly to the topic:\n *\n * ```bash\n * aws sns publish --topic-arn $ITEM_LOAD_TOPIC --message '{\n *   \"type\": \"Feature\",\n *   \"stac_version\": \"1.0.0\",\n *   \"id\": \"example-item\",\n *   \"properties\": {\"datetime\": \"2021-01-01T00:00:00Z\"},\n *   \"geometry\": {\"type\": \"Polygon\", \"coordinates\": [...]},\n *   \"collection\": \"example-collection\"\n * }'\n * ```\n *\n * ## S3 Event Configuration\n *\n * To enable S3 event-driven loading, configure S3 bucket notifications to send\n * events to the SNS topic when STAC items (.json or .geojson files) are uploaded:\n *\n * ```typescript\n * // Configure S3 bucket to send notifications to the loader topic\n * bucket.addEventNotification(\n *   s3.EventType.OBJECT_CREATED,\n *   new s3n.SnsDestination(loader.topic),\n *   { suffix: '.json' }\n * );\n *\n * bucket.addEventNotification(\n *   s3.EventType.OBJECT_CREATED,\n *   new s3n.SnsDestination(loader.topic),\n *   { suffix: '.geojson' }\n * );\n * ```\n *\n * When STAC items are uploaded to the configured S3 bucket, the loader will:\n * 1. Receive S3 event notifications via SNS\n * 2. Fetch the STAC item JSON from S3\n * 3. Validate and load the item into the pgstac database\n *\n * ## Monitoring and Troubleshooting\n *\n * - Monitor Lambda logs: `/aws/lambda/{FunctionName}`\n * - **Dead Letter Queue**: Check for failed items - **no automated handling provided**\n * - Use batch item failure reporting for partial batch processing\n * - CloudWatch metrics available for queue depth and Lambda performance\n *\n * ### Dead Letter Queue Management\n *\n * Applications must implement their own dead letter queue monitoring:\n *\n * ```typescript\n * // Example: CloudWatch alarm for dead letter queue depth\n * new cloudwatch.Alarm(this, 'DeadLetterAlarm', {\n *   metric: loader.deadLetterQueue.metricApproximateNumberOfVisibleMessages(),\n *   threshold: 1,\n *   evaluationPeriods: 1\n * });\n *\n * // Example: Lambda to reprocess dead letter messages\n * const reprocessFunction = new lambda.Function(this, 'Reprocess', {\n *   // Implementation to fetch and republish failed messages\n * });\n * ```\n *\n */\nexport class StacItemLoader extends Construct {\n  /**\n   * The SNS topic that receives STAC items and S3 event notifications for loading.\n   *\n   * This topic serves as the entry point for two types of events:\n   * 1. Direct STAC item JSON documents published by external services\n   * 2. S3 event notifications when STAC items are uploaded to configured buckets\n   *\n   * The topic fans out to the SQS queue for batched processing.\n   */\n  public readonly topic: sns.Topic;\n\n  /**\n   * The SQS queue that buffers messages before processing.\n   *\n   * This queue collects both direct STAC items from SNS and S3 event\n   * notifications, batching them for efficient database operations.\n   * Configured with a visibility timeout that accommodates Lambda\n   * processing time plus buffer.\n   */\n  public readonly queue: sqs.Queue;\n\n  /**\n   * Dead letter queue for failed item loading attempts.\n   *\n   * Messages that fail processing after 5 attempts are sent here\n   * for inspection and potential replay. Retains messages for 14 days\n   * to allow for debugging and manual intervention.\n   *\n   * **User Responsibility**: This construct provides NO automated monitoring,\n   * alerting, or reprocessing of dead letter queue messages. Applications\n   * using this construct must implement their own:\n   * - Dead letter queue depth monitoring and alerting\n   * - Failed message inspection and debugging workflows\n   * - Manual or automated reprocessing mechanisms\n   * - Cleanup procedures for old failed messages\n   */\n  public readonly deadLetterQueue: sqs.Queue;\n\n  /**\n   * The Lambda function that loads STAC items into the pgstac database.\n   *\n   * This Python function receives batches of messages from SQS and processes\n   * them based on their type:\n   * - Direct STAC items: Validates and loads directly into pgstac\n   * - S3 events: Fetches STAC items from S3, validates, and loads into pgstac\n   *\n   * The function connects to PostgreSQL using credentials from Secrets Manager\n   * and uses pypgstac for efficient database operations.\n   */\n  public readonly lambdaFunction: lambda.Function;\n\n  constructor(scope: Construct, id: string, props: StacItemLoaderProps) {\n    super(scope, id);\n\n    const timeoutSeconds = props.lambdaTimeoutSeconds ?? 300;\n    const lambdaRuntime = props.lambdaRuntime ?? lambda.Runtime.PYTHON_3_11;\n    const maxConcurrency = props.maxConcurrency ?? 2;\n\n    // Create dead letter queue\n    this.deadLetterQueue = new sqs.Queue(this, \"DeadLetterQueue\", {\n      retentionPeriod: Duration.days(14),\n    });\n\n    // Create main queue\n    this.queue = new sqs.Queue(this, \"Queue\", {\n      visibilityTimeout: Duration.seconds(timeoutSeconds + 10),\n      encryption: sqs.QueueEncryption.SQS_MANAGED,\n      deadLetterQueue: {\n        maxReceiveCount: 5,\n        queue: this.deadLetterQueue,\n      },\n    });\n\n    // Create SNS topic\n    this.topic = new sns.Topic(this, \"Topic\", {\n      displayName: `${id}-StacItemLoaderTopic`,\n    });\n\n    // Subscribe the queue to the topic\n    this.topic.addSubscription(\n      new snsSubscriptions.SqsSubscription(this.queue)\n    );\n\n    // Create the lambda function\n    this.lambdaFunction = new lambda.Function(this, \"Function\", {\n      runtime: lambdaRuntime,\n      handler: \"stac_item_loader.handler.handler\",\n      vpc: props.vpc,\n      vpcSubnets: props.subnetSelection,\n      code: lambda.Code.fromDockerBuild(path.join(__dirname, \"..\"), {\n        file: \"stac-item-loader/runtime/Dockerfile\",\n        platform: \"linux/amd64\",\n        buildArgs: {\n          PYTHON_VERSION: lambdaRuntime.toString().replace(\"python\", \"\"),\n          PGSTAC_VERSION: props.pgstacDb.pgstacVersion,\n        },\n      }),\n      memorySize: props.memorySize ?? 1024,\n      timeout: Duration.seconds(timeoutSeconds),\n      reservedConcurrentExecutions: maxConcurrency,\n      logRetention: logs.RetentionDays.ONE_WEEK,\n      environment: {\n        PGSTAC_SECRET_ARN: props.pgstacDb.pgstacSecret.secretArn,\n        ...props.environment,\n      },\n      // overwrites defaults with user-provided configurable properties\n      ...props.lambdaFunctionOptions,\n    });\n\n    // Grant permissions to read the database secret\n    props.pgstacDb.pgstacSecret.grantRead(this.lambdaFunction);\n\n    // Add SQS event source to the lambda\n    this.lambdaFunction.addEventSource(\n      new lambdaEventSources.SqsEventSource(this.queue, {\n        batchSize: props.batchSize ?? 500,\n        maxBatchingWindow: Duration.minutes(\n          props.maxBatchingWindowMinutes ?? 1\n        ),\n        maxConcurrency: maxConcurrency,\n        reportBatchItemFailures: true,\n      })\n    );\n\n    // Create outputs\n    new CfnOutput(this, \"TopicArn\", {\n      value: this.topic.topicArn,\n      description: \"ARN of the StacItemLoader SNS Topic\",\n      exportName: \"stac-item-loader-topic-arn\",\n    });\n\n    new CfnOutput(this, \"QueueUrl\", {\n      value: this.queue.queueUrl,\n      description: \"URL of the StacItemLoader SQS Queue\",\n      exportName: \"stac-item-loader-queue-url\",\n    });\n\n    new CfnOutput(this, \"DeadLetterQueueUrl\", {\n      value: this.deadLetterQueue.queueUrl,\n      description: \"URL of the StacItemLoader Dead Letter Queue\",\n      exportName: \"stac-item-loader-deadletter-queue-url\",\n    });\n\n    new CfnOutput(this, \"FunctionName\", {\n      value: this.lambdaFunction.functionName,\n      description: \"Name of the StacItemLoader Lambda Function\",\n      exportName: \"stac-item-loader-function-name\",\n    });\n  }\n}\n"]}
@@ -0,0 +1,18 @@
1
+ ARG PYTHON_VERSION=3.11
2
+ FROM public.ecr.aws/lambda/python:${PYTHON_VERSION}
3
+ COPY --from=ghcr.io/astral-sh/uv:0.7.8 /uv /uvx /bin/
4
+
5
+ ENV UV_COMPILE_BYTECODE=1
6
+ ENV PYTHONUNBUFFERED=1
7
+
8
+ WORKDIR /asset
9
+
10
+ COPY stac-item-loader/runtime/pyproject.toml pyproject.toml
11
+ COPY stac-item-loader/runtime/src/stac_item_loader/ stac_item_loader/
12
+
13
+ ARG PGSTAC_VERSION=0.9.6
14
+ RUN uv add --no-sync pypgstac==${PGSTAC_VERSION} && \
15
+ uv export --no-dev --no-editable -o requirements.txt && \
16
+ uv pip install --target /asset -r requirements.txt
17
+
18
+ CMD ["stac_item_loader.handler.handler"]