thinkwork-cli 0.9.0 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +202 -0
- package/README.md +2 -2
- package/dist/cli.js +1315 -330
- package/dist/terraform/examples/greenfield/main.tf +325 -19
- package/dist/terraform/examples/greenfield/terraform.tfvars.example +14 -0
- package/dist/terraform/modules/app/agentcore-code-interpreter/Dockerfile.sandbox-base +61 -0
- package/dist/terraform/modules/app/agentcore-code-interpreter/README.md +54 -0
- package/dist/terraform/modules/app/agentcore-code-interpreter/main.tf +197 -0
- package/dist/terraform/modules/app/agentcore-code-interpreter/scripts/build_and_push_sandbox_base.sh +70 -0
- package/dist/terraform/modules/app/agentcore-flue/README.md +58 -0
- package/dist/terraform/modules/app/agentcore-flue/main.tf +322 -0
- package/dist/terraform/modules/app/agentcore-flue/outputs.tf +23 -0
- package/dist/terraform/modules/app/agentcore-flue/variables.tf +91 -0
- package/dist/terraform/modules/app/agentcore-memory/scripts/create_or_find_memory.sh +0 -0
- package/dist/terraform/modules/app/agentcore-runtime/main.tf +165 -0
- package/dist/terraform/modules/app/appsync-subscriptions/main.tf +4 -0
- package/dist/terraform/modules/app/appsync-subscriptions/outputs.tf +5 -0
- package/dist/terraform/modules/app/computer-runtime/README.md +15 -0
- package/dist/terraform/modules/app/computer-runtime/main.tf +406 -0
- package/dist/terraform/modules/app/computer-runtime/outputs.tf +75 -0
- package/dist/terraform/modules/app/computer-runtime/variables.tf +66 -0
- package/dist/terraform/modules/app/hindsight-memory/main.tf +6 -0
- package/dist/terraform/modules/app/lambda-api/eval-fanout.tf +128 -0
- package/dist/terraform/modules/app/lambda-api/handlers.tf +1454 -43
- package/dist/terraform/modules/app/lambda-api/main.tf +221 -12
- package/dist/terraform/modules/app/lambda-api/mcp-oauth.tf +118 -0
- package/dist/terraform/modules/app/lambda-api/oauth-secrets.tf +49 -0
- package/dist/terraform/modules/app/lambda-api/outputs.tf +38 -0
- package/dist/terraform/modules/app/lambda-api/slack-app-secrets.tf +43 -0
- package/dist/terraform/modules/app/lambda-api/stripe-secrets.tf +53 -0
- package/dist/terraform/modules/app/lambda-api/variables.tf +349 -2
- package/dist/terraform/modules/app/lambda-api/workspace-events.tf +125 -0
- package/dist/terraform/modules/app/routines-stepfunctions/main.tf +453 -0
- package/dist/terraform/modules/app/sandbox-log-scrubber/README.md +66 -0
- package/dist/terraform/modules/app/sandbox-log-scrubber/main.tf +200 -0
- package/dist/terraform/modules/app/static-site/main.tf +146 -5
- package/dist/terraform/modules/app/www-dns/main.tf +118 -15
- package/dist/terraform/modules/app/www-dns/outputs.tf +10 -0
- package/dist/terraform/modules/app/www-dns/variables.tf +42 -0
- package/dist/terraform/modules/data/aurora-postgres/main.tf +164 -3
- package/dist/terraform/modules/data/aurora-postgres/outputs.tf +34 -0
- package/dist/terraform/modules/data/aurora-postgres/variables.tf +16 -0
- package/dist/terraform/modules/data/compliance-audit-bucket/README.md +145 -0
- package/dist/terraform/modules/data/compliance-audit-bucket/main.tf +573 -0
- package/dist/terraform/modules/data/compliance-audit-bucket/outputs.tf +43 -0
- package/dist/terraform/modules/data/compliance-audit-bucket/variables.tf +93 -0
- package/dist/terraform/modules/data/compliance-exports-bucket/main.tf +269 -0
- package/dist/terraform/modules/data/compliance-exports-bucket/outputs.tf +23 -0
- package/dist/terraform/modules/data/compliance-exports-bucket/variables.tf +50 -0
- package/dist/terraform/modules/data/s3-backups-bucket/main.tf +123 -0
- package/dist/terraform/modules/data/s3-buckets/main.tf +13 -0
- package/dist/terraform/modules/foundation/cognito/variables.tf +2 -2
- package/dist/terraform/modules/thinkwork/main.tf +439 -21
- package/dist/terraform/modules/thinkwork/outputs.tf +121 -0
- package/dist/terraform/modules/thinkwork/variables.tf +153 -2
- package/dist/terraform/schema.graphql +17 -0
- package/package.json +15 -14
|
@@ -0,0 +1,406 @@
|
|
|
1
|
+
################################################################################
|
|
2
|
+
# ThinkWork Computer Runtime — shared ECS/EFS substrate
|
|
3
|
+
#
|
|
4
|
+
# This module intentionally creates shared substrate only. Per-Computer EFS
|
|
5
|
+
# access points, task-definition revisions, and ECS services are reconciled by
|
|
6
|
+
# the Computer manager Lambda from database rows.
|
|
7
|
+
################################################################################
|
|
8
|
+
|
|
9
|
+
locals {
|
|
10
|
+
task_subnet_ids = length(var.task_subnet_ids) > 0 ? var.task_subnet_ids : var.subnet_ids
|
|
11
|
+
assign_public_ip = var.assign_public_ip ? "ENABLED" : "DISABLED"
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
resource "aws_ecr_repository" "runtime" {
|
|
15
|
+
name = "thinkwork-${var.stage}-computer-runtime"
|
|
16
|
+
image_tag_mutability = "MUTABLE"
|
|
17
|
+
|
|
18
|
+
image_scanning_configuration {
|
|
19
|
+
scan_on_push = true
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
tags = { Name = "thinkwork-${var.stage}-computer-runtime" }
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
resource "aws_cloudwatch_log_group" "runtime" {
|
|
26
|
+
name = "/thinkwork/${var.stage}/computer-runtime"
|
|
27
|
+
retention_in_days = var.log_retention_days
|
|
28
|
+
|
|
29
|
+
tags = { Name = "thinkwork-${var.stage}-computer-runtime-logs" }
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
resource "aws_cloudwatch_log_group" "ecs_exec" {
|
|
33
|
+
name = "/thinkwork/${var.stage}/computer-ecs-exec"
|
|
34
|
+
retention_in_days = var.log_retention_days
|
|
35
|
+
|
|
36
|
+
tags = { Name = "thinkwork-${var.stage}-computer-ecs-exec-logs" }
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
resource "aws_ecs_cluster" "runtime" {
|
|
40
|
+
name = "thinkwork-${var.stage}-computer"
|
|
41
|
+
|
|
42
|
+
setting {
|
|
43
|
+
name = "containerInsights"
|
|
44
|
+
value = "enabled"
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
# ECS Exec audit-log destination. Per-session command streams land in this
|
|
48
|
+
# log group once enable_execute_command=true is set on the per-Computer
|
|
49
|
+
# service AND the task role has ssmmessages:Create/OpenControl+DataChannel.
|
|
50
|
+
# CloudTrail captures the ExecuteCommand API call separately. Plan:
|
|
51
|
+
# docs/plans/2026-05-13-004-feat-computer-terminal-ecs-exec-plan.md.
|
|
52
|
+
configuration {
|
|
53
|
+
execute_command_configuration {
|
|
54
|
+
logging = "OVERRIDE"
|
|
55
|
+
log_configuration {
|
|
56
|
+
cloud_watch_log_group_name = aws_cloudwatch_log_group.ecs_exec.name
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
tags = { Name = "thinkwork-${var.stage}-computer-cluster" }
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
resource "aws_efs_file_system" "workspace" {
|
|
65
|
+
creation_token = "thinkwork-${var.stage}-computer-workspaces"
|
|
66
|
+
encrypted = true
|
|
67
|
+
|
|
68
|
+
tags = { Name = "thinkwork-${var.stage}-computer-workspaces" }
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
resource "aws_security_group" "task" {
|
|
72
|
+
name_prefix = "thinkwork-${var.stage}-computer-task-"
|
|
73
|
+
description = "ThinkWork Computer runtime task egress and EFS client access"
|
|
74
|
+
vpc_id = var.vpc_id
|
|
75
|
+
|
|
76
|
+
egress {
|
|
77
|
+
from_port = 0
|
|
78
|
+
to_port = 0
|
|
79
|
+
protocol = "-1"
|
|
80
|
+
cidr_blocks = ["0.0.0.0/0"]
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
tags = { Name = "thinkwork-${var.stage}-computer-task-sg" }
|
|
84
|
+
lifecycle { create_before_destroy = true }
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
resource "aws_security_group" "efs" {
|
|
88
|
+
name_prefix = "thinkwork-${var.stage}-computer-efs-"
|
|
89
|
+
description = "ThinkWork Computer workspace EFS"
|
|
90
|
+
vpc_id = var.vpc_id
|
|
91
|
+
|
|
92
|
+
ingress {
|
|
93
|
+
description = "NFS from Computer runtime tasks"
|
|
94
|
+
from_port = 2049
|
|
95
|
+
to_port = 2049
|
|
96
|
+
protocol = "tcp"
|
|
97
|
+
security_groups = [aws_security_group.task.id]
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
tags = { Name = "thinkwork-${var.stage}-computer-efs-sg" }
|
|
101
|
+
lifecycle { create_before_destroy = true }
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
resource "aws_security_group" "vpc_endpoints" {
|
|
105
|
+
name_prefix = "thinkwork-${var.stage}-computer-vpce-"
|
|
106
|
+
description = "PrivateLink endpoints for Computer runtime image pulls and logs"
|
|
107
|
+
vpc_id = var.vpc_id
|
|
108
|
+
|
|
109
|
+
ingress {
|
|
110
|
+
description = "HTTPS from Computer runtime tasks"
|
|
111
|
+
from_port = 443
|
|
112
|
+
to_port = 443
|
|
113
|
+
protocol = "tcp"
|
|
114
|
+
security_groups = [aws_security_group.task.id]
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
tags = { Name = "thinkwork-${var.stage}-computer-vpce-sg" }
|
|
118
|
+
lifecycle { create_before_destroy = true }
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
data "aws_route_table" "computer_subnet" {
|
|
122
|
+
for_each = toset(var.subnet_ids)
|
|
123
|
+
subnet_id = each.value
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
resource "aws_vpc_endpoint" "interface" {
|
|
127
|
+
for_each = toset([
|
|
128
|
+
"bedrock-runtime",
|
|
129
|
+
"ecr.api",
|
|
130
|
+
"ecr.dkr",
|
|
131
|
+
"logs",
|
|
132
|
+
])
|
|
133
|
+
|
|
134
|
+
vpc_id = var.vpc_id
|
|
135
|
+
service_name = "com.amazonaws.${var.region}.${each.value}"
|
|
136
|
+
vpc_endpoint_type = "Interface"
|
|
137
|
+
subnet_ids = var.subnet_ids
|
|
138
|
+
security_group_ids = [aws_security_group.vpc_endpoints.id]
|
|
139
|
+
private_dns_enabled = true
|
|
140
|
+
|
|
141
|
+
tags = { Name = "thinkwork-${var.stage}-computer-${replace(each.value, ".", "-")}-vpce" }
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
resource "aws_vpc_endpoint" "s3" {
|
|
145
|
+
vpc_id = var.vpc_id
|
|
146
|
+
service_name = "com.amazonaws.${var.region}.s3"
|
|
147
|
+
vpc_endpoint_type = "Gateway"
|
|
148
|
+
route_table_ids = distinct([for rt in data.aws_route_table.computer_subnet : rt.id])
|
|
149
|
+
|
|
150
|
+
tags = { Name = "thinkwork-${var.stage}-computer-s3-vpce" }
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
resource "aws_efs_mount_target" "workspace" {
|
|
154
|
+
for_each = toset(var.subnet_ids)
|
|
155
|
+
|
|
156
|
+
file_system_id = aws_efs_file_system.workspace.id
|
|
157
|
+
subnet_id = each.value
|
|
158
|
+
security_groups = [aws_security_group.efs.id]
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
# Shared access point that lets the admin `workspace-files-efs` Lambda read
|
|
162
|
+
# any Computer's workspace directly without going through the per-Computer
|
|
163
|
+
# runtime task queue. Per-Computer access points (created lazily by
|
|
164
|
+
# provisionComputerRuntime) chroot the runtime to a single
|
|
165
|
+
# /tenants/<tenantId>/computers/<computerId> subpath. This admin access point
|
|
166
|
+
# is rooted higher at /tenants so a single Lambda can resolve any
|
|
167
|
+
# (tenantId, computerId) at request time. uid/gid 1000 matches the PosixUser
|
|
168
|
+
# used for the per-Computer access points (see
|
|
169
|
+
# packages/api/src/lib/computers/runtime-control.ts:createAccessPoint), so the
|
|
170
|
+
# Lambda reads files the runtime wrote with consistent ownership.
|
|
171
|
+
resource "aws_efs_access_point" "workspace_admin" {
|
|
172
|
+
file_system_id = aws_efs_file_system.workspace.id
|
|
173
|
+
|
|
174
|
+
posix_user {
|
|
175
|
+
uid = 1000
|
|
176
|
+
gid = 1000
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
root_directory {
|
|
180
|
+
path = "/tenants"
|
|
181
|
+
creation_info {
|
|
182
|
+
owner_uid = 1000
|
|
183
|
+
owner_gid = 1000
|
|
184
|
+
permissions = "750"
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
tags = { Name = "thinkwork-${var.stage}-computer-workspaces-admin" }
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
# Lambda SG that mounts the shared workspace EFS. Reuses the same NFS allow-
|
|
192
|
+
# from rule pattern as the per-Computer task SG; created as a sibling so
|
|
193
|
+
# Lambda traffic is auditable separately from task traffic.
|
|
194
|
+
resource "aws_security_group" "workspace_admin_lambda" {
|
|
195
|
+
name_prefix = "thinkwork-${var.stage}-workspace-admin-lambda-"
|
|
196
|
+
description = "ThinkWork workspace-files-efs Lambda - EFS client"
|
|
197
|
+
vpc_id = var.vpc_id
|
|
198
|
+
|
|
199
|
+
egress {
|
|
200
|
+
from_port = 0
|
|
201
|
+
to_port = 0
|
|
202
|
+
protocol = "-1"
|
|
203
|
+
cidr_blocks = ["0.0.0.0/0"]
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
tags = { Name = "thinkwork-${var.stage}-workspace-admin-lambda-sg" }
|
|
207
|
+
lifecycle { create_before_destroy = true }
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
resource "aws_security_group_rule" "efs_from_workspace_admin_lambda" {
|
|
211
|
+
description = "NFS from workspace-files-efs Lambda"
|
|
212
|
+
type = "ingress"
|
|
213
|
+
from_port = 2049
|
|
214
|
+
to_port = 2049
|
|
215
|
+
protocol = "tcp"
|
|
216
|
+
security_group_id = aws_security_group.efs.id
|
|
217
|
+
source_security_group_id = aws_security_group.workspace_admin_lambda.id
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
resource "aws_iam_role" "execution" {
|
|
221
|
+
name = "thinkwork-${var.stage}-computer-execution"
|
|
222
|
+
|
|
223
|
+
assume_role_policy = jsonencode({
|
|
224
|
+
Version = "2012-10-17"
|
|
225
|
+
Statement = [{
|
|
226
|
+
Action = "sts:AssumeRole"
|
|
227
|
+
Effect = "Allow"
|
|
228
|
+
Principal = { Service = "ecs-tasks.amazonaws.com" }
|
|
229
|
+
}]
|
|
230
|
+
})
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
resource "aws_iam_role_policy_attachment" "execution" {
|
|
234
|
+
role = aws_iam_role.execution.name
|
|
235
|
+
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
resource "aws_iam_role" "task" {
|
|
239
|
+
name = "thinkwork-${var.stage}-computer-task"
|
|
240
|
+
|
|
241
|
+
assume_role_policy = jsonencode({
|
|
242
|
+
Version = "2012-10-17"
|
|
243
|
+
Statement = [{
|
|
244
|
+
Action = "sts:AssumeRole"
|
|
245
|
+
Effect = "Allow"
|
|
246
|
+
Principal = { Service = "ecs-tasks.amazonaws.com" }
|
|
247
|
+
}]
|
|
248
|
+
})
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
resource "aws_iam_role_policy" "task_secrets" {
|
|
252
|
+
count = var.api_auth_secret_arn != "" ? 1 : 0
|
|
253
|
+
name = "computer-runtime-secret-read"
|
|
254
|
+
role = aws_iam_role.task.id
|
|
255
|
+
|
|
256
|
+
policy = jsonencode({
|
|
257
|
+
Version = "2012-10-17"
|
|
258
|
+
Statement = [{
|
|
259
|
+
Effect = "Allow"
|
|
260
|
+
Action = ["secretsmanager:GetSecretValue"]
|
|
261
|
+
Resource = var.api_auth_secret_arn
|
|
262
|
+
}]
|
|
263
|
+
})
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
resource "aws_iam_role_policy" "task_bedrock" {
|
|
267
|
+
name = "computer-runtime-bedrock"
|
|
268
|
+
role = aws_iam_role.task.id
|
|
269
|
+
|
|
270
|
+
policy = jsonencode({
|
|
271
|
+
Version = "2012-10-17"
|
|
272
|
+
Statement = [{
|
|
273
|
+
Effect = "Allow"
|
|
274
|
+
Action = [
|
|
275
|
+
"bedrock:InvokeModel",
|
|
276
|
+
"bedrock:InvokeModelWithResponseStream",
|
|
277
|
+
]
|
|
278
|
+
Resource = [
|
|
279
|
+
"arn:aws:bedrock:*::foundation-model/*",
|
|
280
|
+
"arn:aws:bedrock:*:${var.account_id}:inference-profile/*",
|
|
281
|
+
]
|
|
282
|
+
}]
|
|
283
|
+
})
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
resource "aws_iam_role_policy" "task_agentcore" {
|
|
287
|
+
name = "computer-runtime-agentcore"
|
|
288
|
+
role = aws_iam_role.task.id
|
|
289
|
+
|
|
290
|
+
policy = jsonencode({
|
|
291
|
+
Version = "2012-10-17"
|
|
292
|
+
Statement = [
|
|
293
|
+
{
|
|
294
|
+
Effect = "Allow"
|
|
295
|
+
Action = ["bedrock-agentcore:InvokeAgentRuntime"]
|
|
296
|
+
Resource = "arn:aws:bedrock-agentcore:${var.region}:${var.account_id}:runtime/*"
|
|
297
|
+
},
|
|
298
|
+
{
|
|
299
|
+
Effect = "Allow"
|
|
300
|
+
Action = ["ssm:GetParameter"]
|
|
301
|
+
Resource = "arn:aws:ssm:${var.region}:${var.account_id}:parameter/thinkwork/${var.stage}/agentcore/runtime-id-strands"
|
|
302
|
+
}
|
|
303
|
+
]
|
|
304
|
+
})
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
resource "aws_iam_role_policy" "task_appsync" {
|
|
308
|
+
count = var.appsync_api_arn != "" ? 1 : 0
|
|
309
|
+
name = "computer-runtime-appsync"
|
|
310
|
+
role = aws_iam_role.task.id
|
|
311
|
+
|
|
312
|
+
policy = jsonencode({
|
|
313
|
+
Version = "2012-10-17"
|
|
314
|
+
Statement = [{
|
|
315
|
+
Effect = "Allow"
|
|
316
|
+
Action = ["appsync:GraphQL"]
|
|
317
|
+
Resource = "${var.appsync_api_arn}/types/Mutation/fields/publishComputerThreadChunk"
|
|
318
|
+
}]
|
|
319
|
+
})
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
# ECS Exec — lets the in-task SSM agent open a control + data channel back
|
|
323
|
+
# to ssmmessages. Required for the admin Terminal tab (browser MGS
|
|
324
|
+
# WebSocket terminates at ssmmessages, which fans out to the agent in the
|
|
325
|
+
# task via these channels). Plus CloudWatch Logs PutLogEvents so the
|
|
326
|
+
# per-session command transcript lands in the cluster's exec log group.
|
|
327
|
+
resource "aws_iam_role_policy" "task_ssm_messages" {
|
|
328
|
+
name = "computer-runtime-ssm-messages"
|
|
329
|
+
role = aws_iam_role.task.id
|
|
330
|
+
|
|
331
|
+
policy = jsonencode({
|
|
332
|
+
Version = "2012-10-17"
|
|
333
|
+
Statement = [
|
|
334
|
+
{
|
|
335
|
+
Effect = "Allow"
|
|
336
|
+
Action = [
|
|
337
|
+
"ssmmessages:CreateControlChannel",
|
|
338
|
+
"ssmmessages:CreateDataChannel",
|
|
339
|
+
"ssmmessages:OpenControlChannel",
|
|
340
|
+
"ssmmessages:OpenDataChannel",
|
|
341
|
+
]
|
|
342
|
+
Resource = "*"
|
|
343
|
+
},
|
|
344
|
+
{
|
|
345
|
+
Effect = "Allow"
|
|
346
|
+
Action = [
|
|
347
|
+
"logs:CreateLogStream",
|
|
348
|
+
"logs:DescribeLogStreams",
|
|
349
|
+
"logs:PutLogEvents",
|
|
350
|
+
]
|
|
351
|
+
Resource = "${aws_cloudwatch_log_group.ecs_exec.arn}:*"
|
|
352
|
+
},
|
|
353
|
+
]
|
|
354
|
+
})
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
resource "aws_iam_policy" "manager" {
|
|
358
|
+
name = "thinkwork-${var.stage}-computer-manager"
|
|
359
|
+
description = "Allow the Computer manager Lambda to reconcile per-Computer ECS/EFS resources"
|
|
360
|
+
|
|
361
|
+
policy = jsonencode({
|
|
362
|
+
Version = "2012-10-17"
|
|
363
|
+
Statement = [
|
|
364
|
+
{
|
|
365
|
+
Effect = "Allow"
|
|
366
|
+
Action = [
|
|
367
|
+
"elasticfilesystem:CreateAccessPoint",
|
|
368
|
+
"elasticfilesystem:DescribeAccessPoints",
|
|
369
|
+
"elasticfilesystem:DeleteAccessPoint",
|
|
370
|
+
"elasticfilesystem:TagResource",
|
|
371
|
+
]
|
|
372
|
+
Resource = [
|
|
373
|
+
aws_efs_file_system.workspace.arn,
|
|
374
|
+
"arn:aws:elasticfilesystem:${var.region}:${var.account_id}:access-point/*",
|
|
375
|
+
]
|
|
376
|
+
},
|
|
377
|
+
{
|
|
378
|
+
Effect = "Allow"
|
|
379
|
+
Action = [
|
|
380
|
+
"ecs:CreateService",
|
|
381
|
+
"ecs:UpdateService",
|
|
382
|
+
"ecs:DeleteService",
|
|
383
|
+
"ecs:DescribeServices",
|
|
384
|
+
"ecs:DescribeTasks",
|
|
385
|
+
"ecs:ListTasks",
|
|
386
|
+
"ecs:RegisterTaskDefinition",
|
|
387
|
+
"ecs:DeregisterTaskDefinition",
|
|
388
|
+
"ecs:DescribeTaskDefinition",
|
|
389
|
+
# Admin Terminal tab — computer-terminal-start opens an
|
|
390
|
+
# interactive shell via ECS Exec. AWS scopes ExecuteCommand
|
|
391
|
+
# by task ARN; we leave Resource="*" because tasks are
|
|
392
|
+
# ephemeral and their ARNs are not known at policy-apply
|
|
393
|
+
# time. The Lambda's per-request authz (tenant-admin + the
|
|
394
|
+
# Computer's tenant_id match) is the actual gate.
|
|
395
|
+
"ecs:ExecuteCommand",
|
|
396
|
+
]
|
|
397
|
+
Resource = "*"
|
|
398
|
+
},
|
|
399
|
+
{
|
|
400
|
+
Effect = "Allow"
|
|
401
|
+
Action = ["iam:PassRole"]
|
|
402
|
+
Resource = [aws_iam_role.execution.arn, aws_iam_role.task.arn]
|
|
403
|
+
},
|
|
404
|
+
]
|
|
405
|
+
})
|
|
406
|
+
}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
output "cluster_name" {
|
|
2
|
+
value = aws_ecs_cluster.runtime.name
|
|
3
|
+
}
|
|
4
|
+
|
|
5
|
+
output "cluster_arn" {
|
|
6
|
+
value = aws_ecs_cluster.runtime.arn
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
output "efs_file_system_id" {
|
|
10
|
+
value = aws_efs_file_system.workspace.id
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
output "efs_file_system_arn" {
|
|
14
|
+
value = aws_efs_file_system.workspace.arn
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
output "task_security_group_id" {
|
|
18
|
+
value = aws_security_group.task.id
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
output "efs_security_group_id" {
|
|
22
|
+
value = aws_security_group.efs.id
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
output "workspace_admin_access_point_arn" {
|
|
26
|
+
value = aws_efs_access_point.workspace_admin.arn
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
output "workspace_admin_access_point_id" {
|
|
30
|
+
value = aws_efs_access_point.workspace_admin.id
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
output "workspace_admin_lambda_sg_id" {
|
|
34
|
+
value = aws_security_group.workspace_admin_lambda.id
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
output "subnet_ids" {
|
|
38
|
+
value = var.subnet_ids
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
output "task_subnet_ids" {
|
|
42
|
+
value = local.task_subnet_ids
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
output "assign_public_ip" {
|
|
46
|
+
value = local.assign_public_ip
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
output "execution_role_arn" {
|
|
50
|
+
value = aws_iam_role.execution.arn
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
output "task_role_arn" {
|
|
54
|
+
value = aws_iam_role.task.arn
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
output "log_group_name" {
|
|
58
|
+
value = aws_cloudwatch_log_group.runtime.name
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
output "repository_url" {
|
|
62
|
+
value = aws_ecr_repository.runtime.repository_url
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
output "default_cpu" {
|
|
66
|
+
value = var.default_cpu
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
output "default_memory" {
|
|
70
|
+
value = var.default_memory
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
output "manager_policy_arn" {
|
|
74
|
+
value = aws_iam_policy.manager.arn
|
|
75
|
+
}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
variable "stage" {
|
|
2
|
+
description = "Deployment stage"
|
|
3
|
+
type = string
|
|
4
|
+
}
|
|
5
|
+
|
|
6
|
+
variable "account_id" {
|
|
7
|
+
description = "AWS account ID"
|
|
8
|
+
type = string
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
variable "region" {
|
|
12
|
+
description = "AWS region"
|
|
13
|
+
type = string
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
variable "vpc_id" {
|
|
17
|
+
description = "VPC ID for Computer runtime tasks and EFS"
|
|
18
|
+
type = string
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
variable "subnet_ids" {
|
|
22
|
+
description = "Private subnet IDs for Computer runtime EFS mount targets and VPC endpoints"
|
|
23
|
+
type = list(string)
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
variable "task_subnet_ids" {
|
|
27
|
+
description = "Subnet IDs for Computer runtime ECS tasks. Defaults to subnet_ids."
|
|
28
|
+
type = list(string)
|
|
29
|
+
default = []
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
variable "assign_public_ip" {
|
|
33
|
+
description = "Whether Computer runtime ECS tasks receive a public IP for direct outbound internet access."
|
|
34
|
+
type = bool
|
|
35
|
+
default = false
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
variable "api_auth_secret_arn" {
|
|
39
|
+
description = "Optional Secrets Manager ARN for THINKWORK_API_SECRET injection. Empty keeps runtime secret wiring deferred to the manager."
|
|
40
|
+
type = string
|
|
41
|
+
default = ""
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
variable "appsync_api_arn" {
|
|
45
|
+
description = "Optional AppSync API ARN for Computer runtime streaming publishes."
|
|
46
|
+
type = string
|
|
47
|
+
default = ""
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
variable "default_cpu" {
|
|
51
|
+
description = "Default Fargate CPU units for one Computer runtime task"
|
|
52
|
+
type = number
|
|
53
|
+
default = 256
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
variable "default_memory" {
|
|
57
|
+
description = "Default Fargate memory MB for one Computer runtime task"
|
|
58
|
+
type = number
|
|
59
|
+
default = 512
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
variable "log_retention_days" {
|
|
63
|
+
description = "CloudWatch log retention for Computer runtime tasks"
|
|
64
|
+
type = number
|
|
65
|
+
default = 7
|
|
66
|
+
}
|
|
@@ -267,6 +267,12 @@ resource "aws_ecs_task_definition" "hindsight" {
|
|
|
267
267
|
{ name = "HINDSIGHT_API_EMBEDDINGS_PROVIDER", value = "local" },
|
|
268
268
|
{ name = "HINDSIGHT_API_EMBEDDINGS_LOCAL_MODEL", value = "BAAI/bge-small-en-v1.5" },
|
|
269
269
|
{ name = "HINDSIGHT_API_RERANKER_PROVIDER", value = "local" },
|
|
270
|
+
{ name = "HINDSIGHT_API_RERANKER_MAX_CANDIDATES", value = "20" },
|
|
271
|
+
{ name = "HINDSIGHT_API_RERANKER_LOCAL_BUCKET_BATCHING", value = "true" },
|
|
272
|
+
{ name = "HINDSIGHT_API_RERANKER_LOCAL_MAX_CONCURRENT", value = "1" },
|
|
273
|
+
{ name = "HINDSIGHT_API_RECALL_BUDGET_FUNCTION", value = "adaptive" },
|
|
274
|
+
{ name = "HINDSIGHT_API_RECALL_BUDGET_MIN", value = "5" },
|
|
275
|
+
{ name = "HINDSIGHT_API_RECALL_BUDGET_MAX", value = "300" },
|
|
270
276
|
]
|
|
271
277
|
|
|
272
278
|
logConfiguration = {
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# ---------------------------------------------------------------------------
|
|
2
|
+
# Evals per-case fan-out substrate
|
|
3
|
+
#
|
|
4
|
+
# eval-runner dispatches one message per test case. eval-worker catches
|
|
5
|
+
# application-level case failures and writes eval_results.status='error';
|
|
6
|
+
# infrastructure failures redrive through SQS to the DLQ after maxReceiveCount=3.
|
|
7
|
+
# ---------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
resource "aws_sqs_queue" "eval_fanout_dlq" {
|
|
10
|
+
count = local.use_local_zips ? 1 : 0
|
|
11
|
+
name = "thinkwork-${var.stage}-eval-fanout-dlq.fifo"
|
|
12
|
+
fifo_queue = true
|
|
13
|
+
message_retention_seconds = 1209600 # 14 days
|
|
14
|
+
sqs_managed_sse_enabled = true
|
|
15
|
+
|
|
16
|
+
tags = {
|
|
17
|
+
Name = "thinkwork-${var.stage}-eval-fanout-dlq.fifo"
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
resource "aws_sqs_queue" "eval_fanout" {
|
|
22
|
+
count = local.use_local_zips ? 1 : 0
|
|
23
|
+
name = "thinkwork-${var.stage}-eval-fanout.fifo"
|
|
24
|
+
fifo_queue = true
|
|
25
|
+
content_based_deduplication = true
|
|
26
|
+
visibility_timeout_seconds = 300
|
|
27
|
+
message_retention_seconds = 86400 # 1 day; DLQ holds longer-stuck messages
|
|
28
|
+
sqs_managed_sse_enabled = true
|
|
29
|
+
|
|
30
|
+
redrive_policy = jsonencode({
|
|
31
|
+
deadLetterTargetArn = aws_sqs_queue.eval_fanout_dlq[0].arn
|
|
32
|
+
maxReceiveCount = 5
|
|
33
|
+
})
|
|
34
|
+
|
|
35
|
+
tags = {
|
|
36
|
+
Name = "thinkwork-${var.stage}-eval-fanout.fifo"
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
resource "aws_iam_role_policy" "eval_fanout_send" {
|
|
41
|
+
count = local.use_local_zips ? 1 : 0
|
|
42
|
+
name = "eval-fanout-send"
|
|
43
|
+
role = aws_iam_role.lambda.id
|
|
44
|
+
|
|
45
|
+
policy = jsonencode({
|
|
46
|
+
Version = "2012-10-17"
|
|
47
|
+
Statement = [{
|
|
48
|
+
Sid = "EvalRunnerSendFanoutMessages"
|
|
49
|
+
Effect = "Allow"
|
|
50
|
+
Action = [
|
|
51
|
+
"sqs:SendMessage",
|
|
52
|
+
"sqs:SendMessageBatch",
|
|
53
|
+
]
|
|
54
|
+
Resource = aws_sqs_queue.eval_fanout[0].arn
|
|
55
|
+
}]
|
|
56
|
+
})
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
resource "aws_iam_role_policy" "eval_worker_sqs" {
|
|
60
|
+
count = local.use_local_zips ? 1 : 0
|
|
61
|
+
name = "eval-worker-sqs"
|
|
62
|
+
role = aws_iam_role.lambda.id
|
|
63
|
+
|
|
64
|
+
policy = jsonencode({
|
|
65
|
+
Version = "2012-10-17"
|
|
66
|
+
Statement = [
|
|
67
|
+
{
|
|
68
|
+
Sid = "EvalWorkerReceiveFanoutMessages"
|
|
69
|
+
Effect = "Allow"
|
|
70
|
+
Action = [
|
|
71
|
+
"sqs:ReceiveMessage",
|
|
72
|
+
"sqs:DeleteMessage",
|
|
73
|
+
"sqs:GetQueueAttributes",
|
|
74
|
+
"sqs:ChangeMessageVisibility",
|
|
75
|
+
]
|
|
76
|
+
Resource = aws_sqs_queue.eval_fanout[0].arn
|
|
77
|
+
},
|
|
78
|
+
{
|
|
79
|
+
Sid = "EvalWorkerSendDlqMessages"
|
|
80
|
+
Effect = "Allow"
|
|
81
|
+
Action = ["sqs:SendMessage"]
|
|
82
|
+
Resource = aws_sqs_queue.eval_fanout_dlq[0].arn
|
|
83
|
+
},
|
|
84
|
+
]
|
|
85
|
+
})
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
resource "aws_lambda_event_source_mapping" "eval_fanout" {
|
|
89
|
+
count = local.use_local_zips ? 1 : 0
|
|
90
|
+
|
|
91
|
+
event_source_arn = aws_sqs_queue.eval_fanout[0].arn
|
|
92
|
+
function_name = aws_lambda_function.handler["eval-worker"].function_name
|
|
93
|
+
batch_size = 1
|
|
94
|
+
enabled = true
|
|
95
|
+
function_response_types = ["ReportBatchItemFailures"]
|
|
96
|
+
|
|
97
|
+
scaling_config {
|
|
98
|
+
maximum_concurrency = 20
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
resource "aws_lambda_function_event_invoke_config" "eval_worker" {
|
|
103
|
+
count = local.use_local_zips ? 1 : 0
|
|
104
|
+
|
|
105
|
+
function_name = aws_lambda_function.handler["eval-worker"].function_name
|
|
106
|
+
maximum_event_age_in_seconds = 3600
|
|
107
|
+
maximum_retry_attempts = 0
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
resource "aws_cloudwatch_metric_alarm" "eval_fanout_dlq_depth" {
|
|
111
|
+
count = local.use_local_zips ? 1 : 0
|
|
112
|
+
|
|
113
|
+
alarm_name = "thinkwork-${var.stage}-eval-fanout-dlq-depth"
|
|
114
|
+
alarm_description = "Eval fan-out DLQ has messages — eval-worker crashed before recording a case result; operator must inspect."
|
|
115
|
+
namespace = "AWS/SQS"
|
|
116
|
+
metric_name = "ApproximateNumberOfMessagesVisible"
|
|
117
|
+
statistic = "Maximum"
|
|
118
|
+
period = 60
|
|
119
|
+
evaluation_periods = 1
|
|
120
|
+
threshold = 1
|
|
121
|
+
comparison_operator = "GreaterThanOrEqualToThreshold"
|
|
122
|
+
treat_missing_data = "notBreaching"
|
|
123
|
+
alarm_actions = []
|
|
124
|
+
|
|
125
|
+
dimensions = {
|
|
126
|
+
QueueName = aws_sqs_queue.eval_fanout_dlq[0].name
|
|
127
|
+
}
|
|
128
|
+
}
|