thinkwork-cli 0.9.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/LICENSE +202 -0
  2. package/README.md +2 -2
  3. package/dist/cli.js +1187 -315
  4. package/dist/terraform/examples/greenfield/main.tf +325 -19
  5. package/dist/terraform/examples/greenfield/terraform.tfvars.example +14 -0
  6. package/dist/terraform/modules/app/agentcore-code-interpreter/Dockerfile.sandbox-base +61 -0
  7. package/dist/terraform/modules/app/agentcore-code-interpreter/README.md +54 -0
  8. package/dist/terraform/modules/app/agentcore-code-interpreter/main.tf +197 -0
  9. package/dist/terraform/modules/app/agentcore-code-interpreter/scripts/build_and_push_sandbox_base.sh +70 -0
  10. package/dist/terraform/modules/app/agentcore-flue/README.md +58 -0
  11. package/dist/terraform/modules/app/agentcore-flue/main.tf +322 -0
  12. package/dist/terraform/modules/app/agentcore-flue/outputs.tf +23 -0
  13. package/dist/terraform/modules/app/agentcore-flue/variables.tf +91 -0
  14. package/dist/terraform/modules/app/agentcore-memory/scripts/create_or_find_memory.sh +0 -0
  15. package/dist/terraform/modules/app/agentcore-runtime/main.tf +165 -0
  16. package/dist/terraform/modules/app/appsync-subscriptions/main.tf +4 -0
  17. package/dist/terraform/modules/app/appsync-subscriptions/outputs.tf +5 -0
  18. package/dist/terraform/modules/app/computer-runtime/README.md +15 -0
  19. package/dist/terraform/modules/app/computer-runtime/main.tf +406 -0
  20. package/dist/terraform/modules/app/computer-runtime/outputs.tf +75 -0
  21. package/dist/terraform/modules/app/computer-runtime/variables.tf +66 -0
  22. package/dist/terraform/modules/app/hindsight-memory/main.tf +6 -0
  23. package/dist/terraform/modules/app/lambda-api/eval-fanout.tf +128 -0
  24. package/dist/terraform/modules/app/lambda-api/handlers.tf +1454 -43
  25. package/dist/terraform/modules/app/lambda-api/main.tf +221 -12
  26. package/dist/terraform/modules/app/lambda-api/mcp-oauth.tf +118 -0
  27. package/dist/terraform/modules/app/lambda-api/oauth-secrets.tf +49 -0
  28. package/dist/terraform/modules/app/lambda-api/outputs.tf +38 -0
  29. package/dist/terraform/modules/app/lambda-api/slack-app-secrets.tf +43 -0
  30. package/dist/terraform/modules/app/lambda-api/stripe-secrets.tf +53 -0
  31. package/dist/terraform/modules/app/lambda-api/variables.tf +349 -2
  32. package/dist/terraform/modules/app/lambda-api/workspace-events.tf +125 -0
  33. package/dist/terraform/modules/app/routines-stepfunctions/main.tf +453 -0
  34. package/dist/terraform/modules/app/sandbox-log-scrubber/README.md +66 -0
  35. package/dist/terraform/modules/app/sandbox-log-scrubber/main.tf +200 -0
  36. package/dist/terraform/modules/app/static-site/main.tf +146 -5
  37. package/dist/terraform/modules/app/www-dns/main.tf +118 -15
  38. package/dist/terraform/modules/app/www-dns/outputs.tf +10 -0
  39. package/dist/terraform/modules/app/www-dns/variables.tf +42 -0
  40. package/dist/terraform/modules/data/aurora-postgres/main.tf +164 -3
  41. package/dist/terraform/modules/data/aurora-postgres/outputs.tf +34 -0
  42. package/dist/terraform/modules/data/aurora-postgres/variables.tf +16 -0
  43. package/dist/terraform/modules/data/compliance-audit-bucket/README.md +145 -0
  44. package/dist/terraform/modules/data/compliance-audit-bucket/main.tf +573 -0
  45. package/dist/terraform/modules/data/compliance-audit-bucket/outputs.tf +43 -0
  46. package/dist/terraform/modules/data/compliance-audit-bucket/variables.tf +93 -0
  47. package/dist/terraform/modules/data/compliance-exports-bucket/main.tf +269 -0
  48. package/dist/terraform/modules/data/compliance-exports-bucket/outputs.tf +23 -0
  49. package/dist/terraform/modules/data/compliance-exports-bucket/variables.tf +50 -0
  50. package/dist/terraform/modules/data/s3-backups-bucket/main.tf +123 -0
  51. package/dist/terraform/modules/data/s3-buckets/main.tf +13 -0
  52. package/dist/terraform/modules/foundation/cognito/variables.tf +2 -2
  53. package/dist/terraform/modules/thinkwork/main.tf +439 -21
  54. package/dist/terraform/modules/thinkwork/outputs.tf +121 -0
  55. package/dist/terraform/modules/thinkwork/variables.tf +153 -2
  56. package/dist/terraform/schema.graphql +17 -0
  57. package/package.json +15 -14
@@ -0,0 +1,406 @@
1
+ ################################################################################
2
+ # ThinkWork Computer Runtime — shared ECS/EFS substrate
3
+ #
4
+ # This module intentionally creates shared substrate only. Per-Computer EFS
5
+ # access points, task-definition revisions, and ECS services are reconciled by
6
+ # the Computer manager Lambda from database rows.
7
+ ################################################################################
8
+
9
+ locals {
10
+ task_subnet_ids = length(var.task_subnet_ids) > 0 ? var.task_subnet_ids : var.subnet_ids
11
+ assign_public_ip = var.assign_public_ip ? "ENABLED" : "DISABLED"
12
+ }
13
+
14
+ resource "aws_ecr_repository" "runtime" {
15
+ name = "thinkwork-${var.stage}-computer-runtime"
16
+ image_tag_mutability = "MUTABLE"
17
+
18
+ image_scanning_configuration {
19
+ scan_on_push = true
20
+ }
21
+
22
+ tags = { Name = "thinkwork-${var.stage}-computer-runtime" }
23
+ }
24
+
25
+ resource "aws_cloudwatch_log_group" "runtime" {
26
+ name = "/thinkwork/${var.stage}/computer-runtime"
27
+ retention_in_days = var.log_retention_days
28
+
29
+ tags = { Name = "thinkwork-${var.stage}-computer-runtime-logs" }
30
+ }
31
+
32
+ resource "aws_cloudwatch_log_group" "ecs_exec" {
33
+ name = "/thinkwork/${var.stage}/computer-ecs-exec"
34
+ retention_in_days = var.log_retention_days
35
+
36
+ tags = { Name = "thinkwork-${var.stage}-computer-ecs-exec-logs" }
37
+ }
38
+
39
+ resource "aws_ecs_cluster" "runtime" {
40
+ name = "thinkwork-${var.stage}-computer"
41
+
42
+ setting {
43
+ name = "containerInsights"
44
+ value = "enabled"
45
+ }
46
+
47
+ # ECS Exec audit-log destination. Per-session command streams land in this
48
+ # log group once enable_execute_command=true is set on the per-Computer
49
+ # service AND the task role has ssmmessages:Create/OpenControl+DataChannel.
50
+ # CloudTrail captures the ExecuteCommand API call separately. Plan:
51
+ # docs/plans/2026-05-13-004-feat-computer-terminal-ecs-exec-plan.md.
52
+ configuration {
53
+ execute_command_configuration {
54
+ logging = "OVERRIDE"
55
+ log_configuration {
56
+ cloud_watch_log_group_name = aws_cloudwatch_log_group.ecs_exec.name
57
+ }
58
+ }
59
+ }
60
+
61
+ tags = { Name = "thinkwork-${var.stage}-computer-cluster" }
62
+ }
63
+
64
+ resource "aws_efs_file_system" "workspace" {
65
+ creation_token = "thinkwork-${var.stage}-computer-workspaces"
66
+ encrypted = true
67
+
68
+ tags = { Name = "thinkwork-${var.stage}-computer-workspaces" }
69
+ }
70
+
71
+ resource "aws_security_group" "task" {
72
+ name_prefix = "thinkwork-${var.stage}-computer-task-"
73
+ description = "ThinkWork Computer runtime task egress and EFS client access"
74
+ vpc_id = var.vpc_id
75
+
76
+ egress {
77
+ from_port = 0
78
+ to_port = 0
79
+ protocol = "-1"
80
+ cidr_blocks = ["0.0.0.0/0"]
81
+ }
82
+
83
+ tags = { Name = "thinkwork-${var.stage}-computer-task-sg" }
84
+ lifecycle { create_before_destroy = true }
85
+ }
86
+
87
+ resource "aws_security_group" "efs" {
88
+ name_prefix = "thinkwork-${var.stage}-computer-efs-"
89
+ description = "ThinkWork Computer workspace EFS"
90
+ vpc_id = var.vpc_id
91
+
92
+ ingress {
93
+ description = "NFS from Computer runtime tasks"
94
+ from_port = 2049
95
+ to_port = 2049
96
+ protocol = "tcp"
97
+ security_groups = [aws_security_group.task.id]
98
+ }
99
+
100
+ tags = { Name = "thinkwork-${var.stage}-computer-efs-sg" }
101
+ lifecycle { create_before_destroy = true }
102
+ }
103
+
104
+ resource "aws_security_group" "vpc_endpoints" {
105
+ name_prefix = "thinkwork-${var.stage}-computer-vpce-"
106
+ description = "PrivateLink endpoints for Computer runtime image pulls and logs"
107
+ vpc_id = var.vpc_id
108
+
109
+ ingress {
110
+ description = "HTTPS from Computer runtime tasks"
111
+ from_port = 443
112
+ to_port = 443
113
+ protocol = "tcp"
114
+ security_groups = [aws_security_group.task.id]
115
+ }
116
+
117
+ tags = { Name = "thinkwork-${var.stage}-computer-vpce-sg" }
118
+ lifecycle { create_before_destroy = true }
119
+ }
120
+
121
+ data "aws_route_table" "computer_subnet" {
122
+ for_each = toset(var.subnet_ids)
123
+ subnet_id = each.value
124
+ }
125
+
126
+ resource "aws_vpc_endpoint" "interface" {
127
+ for_each = toset([
128
+ "bedrock-runtime",
129
+ "ecr.api",
130
+ "ecr.dkr",
131
+ "logs",
132
+ ])
133
+
134
+ vpc_id = var.vpc_id
135
+ service_name = "com.amazonaws.${var.region}.${each.value}"
136
+ vpc_endpoint_type = "Interface"
137
+ subnet_ids = var.subnet_ids
138
+ security_group_ids = [aws_security_group.vpc_endpoints.id]
139
+ private_dns_enabled = true
140
+
141
+ tags = { Name = "thinkwork-${var.stage}-computer-${replace(each.value, ".", "-")}-vpce" }
142
+ }
143
+
144
+ resource "aws_vpc_endpoint" "s3" {
145
+ vpc_id = var.vpc_id
146
+ service_name = "com.amazonaws.${var.region}.s3"
147
+ vpc_endpoint_type = "Gateway"
148
+ route_table_ids = distinct([for rt in data.aws_route_table.computer_subnet : rt.id])
149
+
150
+ tags = { Name = "thinkwork-${var.stage}-computer-s3-vpce" }
151
+ }
152
+
153
+ resource "aws_efs_mount_target" "workspace" {
154
+ for_each = toset(var.subnet_ids)
155
+
156
+ file_system_id = aws_efs_file_system.workspace.id
157
+ subnet_id = each.value
158
+ security_groups = [aws_security_group.efs.id]
159
+ }
160
+
161
+ # Shared access point that lets the admin `workspace-files-efs` Lambda read
162
+ # any Computer's workspace directly without going through the per-Computer
163
+ # runtime task queue. Per-Computer access points (created lazily by
164
+ # provisionComputerRuntime) chroot the runtime to a single
165
+ # /tenants/<tenantId>/computers/<computerId> subpath. This admin access point
166
+ # is rooted higher at /tenants so a single Lambda can resolve any
167
+ # (tenantId, computerId) at request time. uid/gid 1000 matches the PosixUser
168
+ # used for the per-Computer access points (see
169
+ # packages/api/src/lib/computers/runtime-control.ts:createAccessPoint), so the
170
+ # Lambda reads files the runtime wrote with consistent ownership.
171
+ resource "aws_efs_access_point" "workspace_admin" {
172
+ file_system_id = aws_efs_file_system.workspace.id
173
+
174
+ posix_user {
175
+ uid = 1000
176
+ gid = 1000
177
+ }
178
+
179
+ root_directory {
180
+ path = "/tenants"
181
+ creation_info {
182
+ owner_uid = 1000
183
+ owner_gid = 1000
184
+ permissions = "750"
185
+ }
186
+ }
187
+
188
+ tags = { Name = "thinkwork-${var.stage}-computer-workspaces-admin" }
189
+ }
190
+
191
+ # Lambda SG that mounts the shared workspace EFS. Reuses the same NFS allow-
192
+ # from rule pattern as the per-Computer task SG; created as a sibling so
193
+ # Lambda traffic is auditable separately from task traffic.
194
+ resource "aws_security_group" "workspace_admin_lambda" {
195
+ name_prefix = "thinkwork-${var.stage}-workspace-admin-lambda-"
196
+ description = "ThinkWork workspace-files-efs Lambda - EFS client"
197
+ vpc_id = var.vpc_id
198
+
199
+ egress {
200
+ from_port = 0
201
+ to_port = 0
202
+ protocol = "-1"
203
+ cidr_blocks = ["0.0.0.0/0"]
204
+ }
205
+
206
+ tags = { Name = "thinkwork-${var.stage}-workspace-admin-lambda-sg" }
207
+ lifecycle { create_before_destroy = true }
208
+ }
209
+
210
+ resource "aws_security_group_rule" "efs_from_workspace_admin_lambda" {
211
+ description = "NFS from workspace-files-efs Lambda"
212
+ type = "ingress"
213
+ from_port = 2049
214
+ to_port = 2049
215
+ protocol = "tcp"
216
+ security_group_id = aws_security_group.efs.id
217
+ source_security_group_id = aws_security_group.workspace_admin_lambda.id
218
+ }
219
+
220
+ resource "aws_iam_role" "execution" {
221
+ name = "thinkwork-${var.stage}-computer-execution"
222
+
223
+ assume_role_policy = jsonencode({
224
+ Version = "2012-10-17"
225
+ Statement = [{
226
+ Action = "sts:AssumeRole"
227
+ Effect = "Allow"
228
+ Principal = { Service = "ecs-tasks.amazonaws.com" }
229
+ }]
230
+ })
231
+ }
232
+
233
+ resource "aws_iam_role_policy_attachment" "execution" {
234
+ role = aws_iam_role.execution.name
235
+ policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"
236
+ }
237
+
238
+ resource "aws_iam_role" "task" {
239
+ name = "thinkwork-${var.stage}-computer-task"
240
+
241
+ assume_role_policy = jsonencode({
242
+ Version = "2012-10-17"
243
+ Statement = [{
244
+ Action = "sts:AssumeRole"
245
+ Effect = "Allow"
246
+ Principal = { Service = "ecs-tasks.amazonaws.com" }
247
+ }]
248
+ })
249
+ }
250
+
251
+ resource "aws_iam_role_policy" "task_secrets" {
252
+ count = var.api_auth_secret_arn != "" ? 1 : 0
253
+ name = "computer-runtime-secret-read"
254
+ role = aws_iam_role.task.id
255
+
256
+ policy = jsonencode({
257
+ Version = "2012-10-17"
258
+ Statement = [{
259
+ Effect = "Allow"
260
+ Action = ["secretsmanager:GetSecretValue"]
261
+ Resource = var.api_auth_secret_arn
262
+ }]
263
+ })
264
+ }
265
+
266
+ resource "aws_iam_role_policy" "task_bedrock" {
267
+ name = "computer-runtime-bedrock"
268
+ role = aws_iam_role.task.id
269
+
270
+ policy = jsonencode({
271
+ Version = "2012-10-17"
272
+ Statement = [{
273
+ Effect = "Allow"
274
+ Action = [
275
+ "bedrock:InvokeModel",
276
+ "bedrock:InvokeModelWithResponseStream",
277
+ ]
278
+ Resource = [
279
+ "arn:aws:bedrock:*::foundation-model/*",
280
+ "arn:aws:bedrock:*:${var.account_id}:inference-profile/*",
281
+ ]
282
+ }]
283
+ })
284
+ }
285
+
286
+ resource "aws_iam_role_policy" "task_agentcore" {
287
+ name = "computer-runtime-agentcore"
288
+ role = aws_iam_role.task.id
289
+
290
+ policy = jsonencode({
291
+ Version = "2012-10-17"
292
+ Statement = [
293
+ {
294
+ Effect = "Allow"
295
+ Action = ["bedrock-agentcore:InvokeAgentRuntime"]
296
+ Resource = "arn:aws:bedrock-agentcore:${var.region}:${var.account_id}:runtime/*"
297
+ },
298
+ {
299
+ Effect = "Allow"
300
+ Action = ["ssm:GetParameter"]
301
+ Resource = "arn:aws:ssm:${var.region}:${var.account_id}:parameter/thinkwork/${var.stage}/agentcore/runtime-id-strands"
302
+ }
303
+ ]
304
+ })
305
+ }
306
+
307
+ resource "aws_iam_role_policy" "task_appsync" {
308
+ count = var.appsync_api_arn != "" ? 1 : 0
309
+ name = "computer-runtime-appsync"
310
+ role = aws_iam_role.task.id
311
+
312
+ policy = jsonencode({
313
+ Version = "2012-10-17"
314
+ Statement = [{
315
+ Effect = "Allow"
316
+ Action = ["appsync:GraphQL"]
317
+ Resource = "${var.appsync_api_arn}/types/Mutation/fields/publishComputerThreadChunk"
318
+ }]
319
+ })
320
+ }
321
+
322
+ # ECS Exec — lets the in-task SSM agent open a control + data channel back
323
+ # to ssmmessages. Required for the admin Terminal tab (browser MGS
324
+ # WebSocket terminates at ssmmessages, which fans out to the agent in the
325
+ # task via these channels). Plus CloudWatch Logs PutLogEvents so the
326
+ # per-session command transcript lands in the cluster's exec log group.
327
+ resource "aws_iam_role_policy" "task_ssm_messages" {
328
+ name = "computer-runtime-ssm-messages"
329
+ role = aws_iam_role.task.id
330
+
331
+ policy = jsonencode({
332
+ Version = "2012-10-17"
333
+ Statement = [
334
+ {
335
+ Effect = "Allow"
336
+ Action = [
337
+ "ssmmessages:CreateControlChannel",
338
+ "ssmmessages:CreateDataChannel",
339
+ "ssmmessages:OpenControlChannel",
340
+ "ssmmessages:OpenDataChannel",
341
+ ]
342
+ Resource = "*"
343
+ },
344
+ {
345
+ Effect = "Allow"
346
+ Action = [
347
+ "logs:CreateLogStream",
348
+ "logs:DescribeLogStreams",
349
+ "logs:PutLogEvents",
350
+ ]
351
+ Resource = "${aws_cloudwatch_log_group.ecs_exec.arn}:*"
352
+ },
353
+ ]
354
+ })
355
+ }
356
+
357
+ resource "aws_iam_policy" "manager" {
358
+ name = "thinkwork-${var.stage}-computer-manager"
359
+ description = "Allow the Computer manager Lambda to reconcile per-Computer ECS/EFS resources"
360
+
361
+ policy = jsonencode({
362
+ Version = "2012-10-17"
363
+ Statement = [
364
+ {
365
+ Effect = "Allow"
366
+ Action = [
367
+ "elasticfilesystem:CreateAccessPoint",
368
+ "elasticfilesystem:DescribeAccessPoints",
369
+ "elasticfilesystem:DeleteAccessPoint",
370
+ "elasticfilesystem:TagResource",
371
+ ]
372
+ Resource = [
373
+ aws_efs_file_system.workspace.arn,
374
+ "arn:aws:elasticfilesystem:${var.region}:${var.account_id}:access-point/*",
375
+ ]
376
+ },
377
+ {
378
+ Effect = "Allow"
379
+ Action = [
380
+ "ecs:CreateService",
381
+ "ecs:UpdateService",
382
+ "ecs:DeleteService",
383
+ "ecs:DescribeServices",
384
+ "ecs:DescribeTasks",
385
+ "ecs:ListTasks",
386
+ "ecs:RegisterTaskDefinition",
387
+ "ecs:DeregisterTaskDefinition",
388
+ "ecs:DescribeTaskDefinition",
389
+ # Admin Terminal tab — computer-terminal-start opens an
390
+ # interactive shell via ECS Exec. AWS scopes ExecuteCommand
391
+ # by task ARN; we leave Resource="*" because tasks are
392
+ # ephemeral and their ARNs are not known at policy-apply
393
+ # time. The Lambda's per-request authz (tenant-admin + the
394
+ # Computer's tenant_id match) is the actual gate.
395
+ "ecs:ExecuteCommand",
396
+ ]
397
+ Resource = "*"
398
+ },
399
+ {
400
+ Effect = "Allow"
401
+ Action = ["iam:PassRole"]
402
+ Resource = [aws_iam_role.execution.arn, aws_iam_role.task.arn]
403
+ },
404
+ ]
405
+ })
406
+ }
@@ -0,0 +1,75 @@
1
+ output "cluster_name" {
2
+ value = aws_ecs_cluster.runtime.name
3
+ }
4
+
5
+ output "cluster_arn" {
6
+ value = aws_ecs_cluster.runtime.arn
7
+ }
8
+
9
+ output "efs_file_system_id" {
10
+ value = aws_efs_file_system.workspace.id
11
+ }
12
+
13
+ output "efs_file_system_arn" {
14
+ value = aws_efs_file_system.workspace.arn
15
+ }
16
+
17
+ output "task_security_group_id" {
18
+ value = aws_security_group.task.id
19
+ }
20
+
21
+ output "efs_security_group_id" {
22
+ value = aws_security_group.efs.id
23
+ }
24
+
25
+ output "workspace_admin_access_point_arn" {
26
+ value = aws_efs_access_point.workspace_admin.arn
27
+ }
28
+
29
+ output "workspace_admin_access_point_id" {
30
+ value = aws_efs_access_point.workspace_admin.id
31
+ }
32
+
33
+ output "workspace_admin_lambda_sg_id" {
34
+ value = aws_security_group.workspace_admin_lambda.id
35
+ }
36
+
37
+ output "subnet_ids" {
38
+ value = var.subnet_ids
39
+ }
40
+
41
+ output "task_subnet_ids" {
42
+ value = local.task_subnet_ids
43
+ }
44
+
45
+ output "assign_public_ip" {
46
+ value = local.assign_public_ip
47
+ }
48
+
49
+ output "execution_role_arn" {
50
+ value = aws_iam_role.execution.arn
51
+ }
52
+
53
+ output "task_role_arn" {
54
+ value = aws_iam_role.task.arn
55
+ }
56
+
57
+ output "log_group_name" {
58
+ value = aws_cloudwatch_log_group.runtime.name
59
+ }
60
+
61
+ output "repository_url" {
62
+ value = aws_ecr_repository.runtime.repository_url
63
+ }
64
+
65
+ output "default_cpu" {
66
+ value = var.default_cpu
67
+ }
68
+
69
+ output "default_memory" {
70
+ value = var.default_memory
71
+ }
72
+
73
+ output "manager_policy_arn" {
74
+ value = aws_iam_policy.manager.arn
75
+ }
@@ -0,0 +1,66 @@
1
+ variable "stage" {
2
+ description = "Deployment stage"
3
+ type = string
4
+ }
5
+
6
+ variable "account_id" {
7
+ description = "AWS account ID"
8
+ type = string
9
+ }
10
+
11
+ variable "region" {
12
+ description = "AWS region"
13
+ type = string
14
+ }
15
+
16
+ variable "vpc_id" {
17
+ description = "VPC ID for Computer runtime tasks and EFS"
18
+ type = string
19
+ }
20
+
21
+ variable "subnet_ids" {
22
+ description = "Private subnet IDs for Computer runtime EFS mount targets and VPC endpoints"
23
+ type = list(string)
24
+ }
25
+
26
+ variable "task_subnet_ids" {
27
+ description = "Subnet IDs for Computer runtime ECS tasks. Defaults to subnet_ids."
28
+ type = list(string)
29
+ default = []
30
+ }
31
+
32
+ variable "assign_public_ip" {
33
+ description = "Whether Computer runtime ECS tasks receive a public IP for direct outbound internet access."
34
+ type = bool
35
+ default = false
36
+ }
37
+
38
+ variable "api_auth_secret_arn" {
39
+ description = "Optional Secrets Manager ARN for THINKWORK_API_SECRET injection. Empty keeps runtime secret wiring deferred to the manager."
40
+ type = string
41
+ default = ""
42
+ }
43
+
44
+ variable "appsync_api_arn" {
45
+ description = "Optional AppSync API ARN for Computer runtime streaming publishes."
46
+ type = string
47
+ default = ""
48
+ }
49
+
50
+ variable "default_cpu" {
51
+ description = "Default Fargate CPU units for one Computer runtime task"
52
+ type = number
53
+ default = 256
54
+ }
55
+
56
+ variable "default_memory" {
57
+ description = "Default Fargate memory MB for one Computer runtime task"
58
+ type = number
59
+ default = 512
60
+ }
61
+
62
+ variable "log_retention_days" {
63
+ description = "CloudWatch log retention for Computer runtime tasks"
64
+ type = number
65
+ default = 7
66
+ }
@@ -267,6 +267,12 @@ resource "aws_ecs_task_definition" "hindsight" {
267
267
  { name = "HINDSIGHT_API_EMBEDDINGS_PROVIDER", value = "local" },
268
268
  { name = "HINDSIGHT_API_EMBEDDINGS_LOCAL_MODEL", value = "BAAI/bge-small-en-v1.5" },
269
269
  { name = "HINDSIGHT_API_RERANKER_PROVIDER", value = "local" },
270
+ { name = "HINDSIGHT_API_RERANKER_MAX_CANDIDATES", value = "20" },
271
+ { name = "HINDSIGHT_API_RERANKER_LOCAL_BUCKET_BATCHING", value = "true" },
272
+ { name = "HINDSIGHT_API_RERANKER_LOCAL_MAX_CONCURRENT", value = "1" },
273
+ { name = "HINDSIGHT_API_RECALL_BUDGET_FUNCTION", value = "adaptive" },
274
+ { name = "HINDSIGHT_API_RECALL_BUDGET_MIN", value = "5" },
275
+ { name = "HINDSIGHT_API_RECALL_BUDGET_MAX", value = "300" },
270
276
  ]
271
277
 
272
278
  logConfiguration = {
@@ -0,0 +1,128 @@
1
+ # ---------------------------------------------------------------------------
2
+ # Evals per-case fan-out substrate
3
+ #
4
+ # eval-runner dispatches one message per test case. eval-worker catches
5
+ # application-level case failures and writes eval_results.status='error';
6
+ # infrastructure failures redrive through SQS to the DLQ after maxReceiveCount=3.
7
+ # ---------------------------------------------------------------------------
8
+
9
+ resource "aws_sqs_queue" "eval_fanout_dlq" {
10
+ count = local.use_local_zips ? 1 : 0
11
+ name = "thinkwork-${var.stage}-eval-fanout-dlq.fifo"
12
+ fifo_queue = true
13
+ message_retention_seconds = 1209600 # 14 days
14
+ sqs_managed_sse_enabled = true
15
+
16
+ tags = {
17
+ Name = "thinkwork-${var.stage}-eval-fanout-dlq.fifo"
18
+ }
19
+ }
20
+
21
+ resource "aws_sqs_queue" "eval_fanout" {
22
+ count = local.use_local_zips ? 1 : 0
23
+ name = "thinkwork-${var.stage}-eval-fanout.fifo"
24
+ fifo_queue = true
25
+ content_based_deduplication = true
26
+ visibility_timeout_seconds = 300
27
+ message_retention_seconds = 86400 # 1 day; DLQ holds longer-stuck messages
28
+ sqs_managed_sse_enabled = true
29
+
30
+ redrive_policy = jsonencode({
31
+ deadLetterTargetArn = aws_sqs_queue.eval_fanout_dlq[0].arn
32
+ maxReceiveCount = 5
33
+ })
34
+
35
+ tags = {
36
+ Name = "thinkwork-${var.stage}-eval-fanout.fifo"
37
+ }
38
+ }
39
+
40
+ resource "aws_iam_role_policy" "eval_fanout_send" {
41
+ count = local.use_local_zips ? 1 : 0
42
+ name = "eval-fanout-send"
43
+ role = aws_iam_role.lambda.id
44
+
45
+ policy = jsonencode({
46
+ Version = "2012-10-17"
47
+ Statement = [{
48
+ Sid = "EvalRunnerSendFanoutMessages"
49
+ Effect = "Allow"
50
+ Action = [
51
+ "sqs:SendMessage",
52
+ "sqs:SendMessageBatch",
53
+ ]
54
+ Resource = aws_sqs_queue.eval_fanout[0].arn
55
+ }]
56
+ })
57
+ }
58
+
59
+ resource "aws_iam_role_policy" "eval_worker_sqs" {
60
+ count = local.use_local_zips ? 1 : 0
61
+ name = "eval-worker-sqs"
62
+ role = aws_iam_role.lambda.id
63
+
64
+ policy = jsonencode({
65
+ Version = "2012-10-17"
66
+ Statement = [
67
+ {
68
+ Sid = "EvalWorkerReceiveFanoutMessages"
69
+ Effect = "Allow"
70
+ Action = [
71
+ "sqs:ReceiveMessage",
72
+ "sqs:DeleteMessage",
73
+ "sqs:GetQueueAttributes",
74
+ "sqs:ChangeMessageVisibility",
75
+ ]
76
+ Resource = aws_sqs_queue.eval_fanout[0].arn
77
+ },
78
+ {
79
+ Sid = "EvalWorkerSendDlqMessages"
80
+ Effect = "Allow"
81
+ Action = ["sqs:SendMessage"]
82
+ Resource = aws_sqs_queue.eval_fanout_dlq[0].arn
83
+ },
84
+ ]
85
+ })
86
+ }
87
+
88
+ resource "aws_lambda_event_source_mapping" "eval_fanout" {
89
+ count = local.use_local_zips ? 1 : 0
90
+
91
+ event_source_arn = aws_sqs_queue.eval_fanout[0].arn
92
+ function_name = aws_lambda_function.handler["eval-worker"].function_name
93
+ batch_size = 1
94
+ enabled = true
95
+ function_response_types = ["ReportBatchItemFailures"]
96
+
97
+ scaling_config {
98
+ maximum_concurrency = 20
99
+ }
100
+ }
101
+
102
+ resource "aws_lambda_function_event_invoke_config" "eval_worker" {
103
+ count = local.use_local_zips ? 1 : 0
104
+
105
+ function_name = aws_lambda_function.handler["eval-worker"].function_name
106
+ maximum_event_age_in_seconds = 3600
107
+ maximum_retry_attempts = 0
108
+ }
109
+
110
+ resource "aws_cloudwatch_metric_alarm" "eval_fanout_dlq_depth" {
111
+ count = local.use_local_zips ? 1 : 0
112
+
113
+ alarm_name = "thinkwork-${var.stage}-eval-fanout-dlq-depth"
114
+ alarm_description = "Eval fan-out DLQ has messages — eval-worker crashed before recording a case result; operator must inspect."
115
+ namespace = "AWS/SQS"
116
+ metric_name = "ApproximateNumberOfMessagesVisible"
117
+ statistic = "Maximum"
118
+ period = 60
119
+ evaluation_periods = 1
120
+ threshold = 1
121
+ comparison_operator = "GreaterThanOrEqualToThreshold"
122
+ treat_missing_data = "notBreaching"
123
+ alarm_actions = []
124
+
125
+ dimensions = {
126
+ QueueName = aws_sqs_queue.eval_fanout_dlq[0].name
127
+ }
128
+ }