npm - thinkwork-cli - Versions diffs - 0.9.0 → 0.9.1 - Mend

thinkwork-cli 0.9.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

package/dist/terraform/modules/app/routines-stepfunctions/main.tf ADDED Viewed

@@ -0,0 +1,453 @@
+################################################################################
+# Routines Step Functions — App Module (stage-level)
+#
+# Stage-scoped substrate for the Step Functions Routines runtime:
+#   * Execution role (one per stage, ABAC-tenant-tagged, used by ALL routines).
+#   * CloudWatch log group for state machine execution histories.
+#   * S3 bucket for python() recipe output offload (mandatory due to the
+#     256KB ASL state-payload + 25K-event execution-history caps).
+#
+# **State machines themselves are NOT created here.** Each Routine
+# provisions its own state machine via the createRoutine GraphQL
+# resolver (Phase B U7) using the role + log group exported by this
+# module. This mirrors the agentcore-code-interpreter pattern: stage
+# substrate in Terraform, per-resource fan-out at runtime.
+#
+# **Tenant isolation via ABAC.** The execution role's inline policies use
+# tag-condition matching (aws:PrincipalTag/tenantId vs.
+# aws:ResourceTag/tenantId). Each state machine is created with the
+# tenantId/agentId/routineId tags so cross-tenant invocations fail at
+# the IAM layer.
+################################################################################
+terraform {
+  required_providers {
+    aws = {
+      source  = "hashicorp/aws"
+      version = ">= 5.0"
+    }
+  }
+}
+variable "stage" {
+  description = "Deployment stage (dev, prod, etc.)."
+  type        = string
+}
+variable "region" {
+  description = "AWS region."
+  type        = string
+}
+variable "account_id" {
+  description = "AWS account ID (used to construct IAM resource ARNs)."
+  type        = string
+}
+variable "log_retention_days" {
+  description = "CloudWatch log retention for routine state machine executions."
+  type        = number
+  default     = 30
+}
+variable "execution_callback_lambda_arn" {
+  description = "ARN of the routine-execution-callback Lambda. EventBridge sends SFN execution-state-change events here so routine_executions lifecycle status mirrors the SFN-side reality. When empty, the EventBridge rule is not provisioned (Phase B U9 hasn't deployed yet)."
+  type        = string
+  default     = ""
+}
+locals {
+  log_group_name = "/aws/vendedlogs/states/thinkwork-${var.stage}-routines"
+  output_bucket  = "thinkwork-${var.stage}-routine-output"
+  role_name      = "thinkwork-${var.stage}-routines-execution-role"
+}
+################################################################################
+# CloudWatch Log Group
+#
+# Uses the /aws/vendedlogs/states/ prefix to dodge the resource-policy size
+# cap when the tenant accumulates hundreds of state machines.
+################################################################################
+resource "aws_cloudwatch_log_group" "routines" {
+  name              = local.log_group_name
+  retention_in_days = var.log_retention_days
+  tags = {
+    Name    = local.log_group_name
+    Stage   = var.stage
+    Purpose = "step-functions-routines"
+  }
+}
+################################################################################
+# S3 bucket — python() recipe output offload
+#
+# Path layout: s3://<bucket>/<tenantId>/<executionArn>/<nodeId>/{stdout,stderr}.log
+# The python() Task wrapper writes here; the run-detail surface signs URLs
+# server-side with tenant-scoped IAM and short expirations.
+################################################################################
+resource "aws_s3_bucket" "routine_output" {
+  bucket = local.output_bucket
+  tags = {
+    Name    = local.output_bucket
+    Stage   = var.stage
+    Purpose = "routine-python-output"
+  }
+}
+resource "aws_s3_bucket_public_access_block" "routine_output" {
+  bucket = aws_s3_bucket.routine_output.id
+  block_public_acls       = true
+  block_public_policy     = true
+  ignore_public_acls      = true
+  restrict_public_buckets = true
+}
+resource "aws_s3_bucket_server_side_encryption_configuration" "routine_output" {
+  bucket = aws_s3_bucket.routine_output.id
+  rule {
+    apply_server_side_encryption_by_default {
+      sse_algorithm = "AES256"
+    }
+  }
+}
+resource "aws_s3_bucket_versioning" "routine_output" {
+  bucket = aws_s3_bucket.routine_output.id
+  versioning_configuration {
+    status = "Disabled"
+  }
+}
+################################################################################
+# Step Functions Execution Role — single role for all routines
+#
+# Trust: states.amazonaws.com.
+# ABAC: inline policies condition on tag matching where applicable.
+################################################################################
+resource "aws_iam_role" "execution" {
+  name = local.role_name
+  assume_role_policy = jsonencode({
+    Version = "2012-10-17"
+    Statement = [{
+      Effect    = "Allow"
+      Principal = { Service = "states.amazonaws.com" }
+      Action    = "sts:AssumeRole"
+      Condition = {
+        StringEquals = {
+          "aws:SourceAccount" = var.account_id
+        }
+      }
+    }]
+  })
+  tags = {
+    Name    = local.role_name
+    Stage   = var.stage
+    Purpose = "step-functions-routines-execution"
+  }
+}
+# CloudWatch logs (X-Ray + CloudWatch Logs Delivery for Step Functions).
+# Step Functions requires these specific actions to populate execution
+# history into the vendedlogs prefix.
+resource "aws_iam_role_policy" "execution_logs" {
+  name = "logs-and-xray"
+  role = aws_iam_role.execution.id
+  policy = jsonencode({
+    Version = "2012-10-17"
+    Statement = [
+      {
+        Effect = "Allow"
+        Action = [
+          "logs:CreateLogDelivery",
+          "logs:GetLogDelivery",
+          "logs:UpdateLogDelivery",
+          "logs:DeleteLogDelivery",
+          "logs:ListLogDeliveries",
+          "logs:PutResourcePolicy",
+          "logs:DescribeResourcePolicies",
+          "logs:DescribeLogGroups",
+        ]
+        Resource = "*"
+      },
+      {
+        Effect = "Allow"
+        Action = [
+          "logs:CreateLogStream",
+          "logs:PutLogEvents",
+        ]
+        Resource = "${aws_cloudwatch_log_group.routines.arn}:*"
+      },
+      {
+        Effect = "Allow"
+        Action = [
+          "xray:PutTraceSegments",
+          "xray:PutTelemetryRecords",
+          "xray:GetSamplingRules",
+          "xray:GetSamplingTargets",
+        ]
+        Resource = "*"
+      },
+    ]
+  })
+}
+# Lambda invocation — scoped to the routine task wrapper Lambdas. The
+# wrappers themselves enforce per-tenant authorization (validating the
+# routine_id + execution_arn against the DB) so wildcard here is
+# acceptable; tightening to specific function ARNs causes a chicken-and-
+# egg with the lambda-api module which depends on outputs from this one.
+resource "aws_iam_role_policy" "execution_lambda_invoke" {
+  name = "lambda-invoke-task-wrappers"
+  role = aws_iam_role.execution.id
+  policy = jsonencode({
+    Version = "2012-10-17"
+    Statement = [{
+      Effect = "Allow"
+      Action = ["lambda:InvokeFunction"]
+      Resource = [
+        "arn:aws:lambda:${var.region}:${var.account_id}:function:thinkwork-${var.stage}-api-routine-task-*",
+        "arn:aws:lambda:${var.region}:${var.account_id}:function:thinkwork-${var.stage}-api-routine-resume",
+        "arn:aws:lambda:${var.region}:${var.account_id}:function:thinkwork-${var.stage}-api-routine-approval-callback",
+        "arn:aws:lambda:${var.region}:${var.account_id}:function:thinkwork-${var.stage}-api-email-send",
+        "arn:aws:lambda:${var.region}:${var.account_id}:function:thinkwork-${var.stage}-api-admin-ops-mcp",
+        "arn:aws:lambda:${var.region}:${var.account_id}:function:thinkwork-${var.stage}-api-slack-send",
+      ]
+    }]
+  })
+}
+# Bedrock AgentCore — code interpreter (python() recipe) and agent runtime
+# (agent_invoke recipe via aws-sdk:bedrockagentcore:invokeAgentRuntime
+# direct integration).
+resource "aws_iam_role_policy" "execution_bedrock_agentcore" {
+  name = "bedrock-agentcore"
+  role = aws_iam_role.execution.id
+  policy = jsonencode({
+    Version = "2012-10-17"
+    Statement = [
+      {
+        Effect = "Allow"
+        Action = [
+          "bedrock-agentcore:StartCodeInterpreterSession",
+          "bedrock-agentcore:InvokeCodeInterpreter",
+          "bedrock-agentcore:StopCodeInterpreterSession",
+        ]
+        Resource = "*"
+      },
+      {
+        Effect   = "Allow"
+        Action   = ["bedrock-agentcore:InvokeAgentRuntime"]
+        Resource = "*"
+      },
+    ]
+  })
+}
+# Secrets Manager — for routines that reach for tenant-scoped secrets
+# (e.g. API tokens) via the python() recipe. Path-scoped per tenant.
+resource "aws_iam_role_policy" "execution_secrets" {
+  name = "secrets-manager"
+  role = aws_iam_role.execution.id
+  policy = jsonencode({
+    Version = "2012-10-17"
+    Statement = [{
+      Effect = "Allow"
+      Action = [
+        "secretsmanager:GetSecretValue",
+        "secretsmanager:DescribeSecret",
+      ]
+      Resource = "arn:aws:secretsmanager:${var.region}:${var.account_id}:secret:thinkwork/${var.stage}/routines/*"
+    }]
+  })
+}
+# Step Functions self-invocation — for the routine_invoke recipe (one
+# routine calling another via startExecution.sync:2). Plus task-token
+# completion (SendTaskSuccess / SendTaskFailure) needed by the inbox-
+# approval bridge that runs out-of-band from this state machine.
+#
+# **ABAC tenant isolation is application-layer in Phase A.** A previous
+# revision had an ABAC tag-condition on StartExecution
+# (aws:ResourceTag/tenantId vs aws:PrincipalTag/tenantId). The shared
+# execution role has no tenantId principal tag, so the condition resolved
+# to '' on every invocation and denied all sub-routine calls. True ABAC
+# for a shared role requires session tags via sts:AssumeRole + TagSession
+# (Phase B follow-up — see review residuals). For now tenant isolation
+# lives at the GraphQL resolver layer (routine_id auth check + tenant_id
+# FK), matching the rest of the platform's tenant-scoped resources.
+resource "aws_iam_role_policy" "execution_states" {
+  name = "states-self-invoke"
+  role = aws_iam_role.execution.id
+  policy = jsonencode({
+    Version = "2012-10-17"
+    Statement = [
+      {
+        # StartExecution operates on the stateMachine ARN.
+        Effect = "Allow"
+        Action = [
+          "states:StartExecution",
+        ]
+        Resource = "arn:aws:states:${var.region}:${var.account_id}:stateMachine:thinkwork-${var.stage}-routine-*"
+      },
+      {
+        # DescribeExecution and StopExecution operate on the execution
+        # ARN, not the stateMachine ARN. startExecution.sync:2 polls
+        # DescribeExecution under the hood; without this statement the
+        # routine_invoke recipe would fail at runtime with an implicit
+        # IAM deny.
+        Effect = "Allow"
+        Action = [
+          "states:DescribeExecution",
+          "states:StopExecution",
+        ]
+        Resource = "arn:aws:states:${var.region}:${var.account_id}:execution:thinkwork-${var.stage}-routine-*:*"
+      },
+      {
+        Effect = "Allow"
+        Action = [
+          "states:SendTaskSuccess",
+          "states:SendTaskFailure",
+          "states:SendTaskHeartbeat",
+        ]
+        Resource = "*"
+      },
+    ]
+  })
+}
+# S3 — write to the python() output bucket, scoped by tenant prefix.
+resource "aws_iam_role_policy" "execution_s3_output" {
+  name = "s3-routine-output"
+  role = aws_iam_role.execution.id
+  policy = jsonencode({
+    Version = "2012-10-17"
+    Statement = [
+      {
+        Effect = "Allow"
+        Action = [
+          "s3:PutObject",
+          "s3:GetObject",
+        ]
+        Resource = "${aws_s3_bucket.routine_output.arn}/*"
+      },
+      {
+        Effect   = "Allow"
+        Action   = ["s3:ListBucket"]
+        Resource = aws_s3_bucket.routine_output.arn
+      },
+    ]
+  })
+}
+################################################################################
+# Outputs
+################################################################################
+output "execution_role_arn" {
+  description = "ARN of the Step Functions execution role (assumed by all routine state machines)."
+  value       = aws_iam_role.execution.arn
+}
+output "execution_role_name" {
+  description = "Name of the Step Functions execution role."
+  value       = aws_iam_role.execution.name
+}
+output "log_group_arn" {
+  description = "CloudWatch log group ARN for state machine execution histories."
+  value       = aws_cloudwatch_log_group.routines.arn
+}
+output "log_group_name" {
+  description = "CloudWatch log group name."
+  value       = aws_cloudwatch_log_group.routines.name
+}
+output "output_bucket_name" {
+  description = "S3 bucket for python() recipe stdout/stderr offload."
+  value       = aws_s3_bucket.routine_output.bucket
+}
+output "output_bucket_arn" {
+  description = "S3 bucket ARN for python() recipe stdout/stderr offload."
+  value       = aws_s3_bucket.routine_output.arn
+}
+output "stage" {
+  description = "Echo of the stage variable (convenience for downstream modules)."
+  value       = var.stage
+}
+################################################################################
+# EventBridge → routine-execution-callback (Phase B U9)
+#
+# AWS EventBridge fires `Step Functions Execution Status Change` events on
+# every status transition (RUNNING/SUCCEEDED/FAILED/TIMED_OUT/ABORTED). The
+# routine-execution-callback Lambda speaks both API Gateway POST shape and
+# this EventBridge shape; the Lambda detects the EventBridge event by
+# `source === "aws.states"` and adapts.
+#
+# The rule is gated on var.execution_callback_lambda_arn being set so the
+# substrate module remains provisionable before the lambda-api module
+# defines the function. The thinkwork composite module wires the ARN
+# back in once both modules apply.
+################################################################################
+resource "aws_cloudwatch_event_rule" "sfn_state_change" {
+  count       = var.execution_callback_lambda_arn != "" ? 1 : 0
+  name        = "thinkwork-${var.stage}-routines-sfn-state-change"
+  description = "Forward SFN execution-state-change events to routine-execution-callback so routine_executions tracks lifecycle status."
+  event_pattern = jsonencode({
+    source        = ["aws.states"]
+    "detail-type" = ["Step Functions Execution Status Change"]
+    detail = {
+      # Constrain to state machines provisioned by createRoutine — the
+      # alias-pointing state machine ARNs all live under the
+      # `thinkwork-${stage}-routine-` prefix per the publish flow's naming
+      # contract (Phase B U7).
+      stateMachineArn = [
+        {
+          prefix = "arn:aws:states:${var.region}:${var.account_id}:stateMachine:thinkwork-${var.stage}-routine-"
+        },
+      ]
+    }
+  })
+  tags = {
+    Name  = "thinkwork-${var.stage}-routines-sfn-state-change"
+    Stage = var.stage
+  }
+}
+resource "aws_cloudwatch_event_target" "sfn_state_change" {
+  count     = var.execution_callback_lambda_arn != "" ? 1 : 0
+  rule      = aws_cloudwatch_event_rule.sfn_state_change[0].name
+  target_id = "routine-execution-callback"
+  arn       = var.execution_callback_lambda_arn
+}
+resource "aws_lambda_permission" "sfn_state_change" {
+  count         = var.execution_callback_lambda_arn != "" ? 1 : 0
+  statement_id  = "AllowEventBridgeInvokeRoutineExecutionCallback"
+  action        = "lambda:InvokeFunction"
+  function_name = var.execution_callback_lambda_arn
+  principal     = "events.amazonaws.com"
+  source_arn    = aws_cloudwatch_event_rule.sfn_state_change[0].arn
+}

package/dist/terraform/modules/app/sandbox-log-scrubber/README.md ADDED Viewed

@@ -0,0 +1,66 @@
+# `sandbox-log-scrubber` — R13 CloudWatch backstop
+**Secondary** scrubber for the AgentCore Code Interpreter sandbox. Pattern-
+redacts known-shape OAuth tokens in AgentCore `APPLICATION_LOGS` before they
+land in the long-term CloudWatch tier. See plan Unit 12 +
+`docs/brainstorms/2026-04-22-agentcore-code-sandbox-requirements.md` R13.
+## Relationship to the primary scrubber
+The primary layer is the base-image `sitecustomize.py` stdio wrapper shipped
+by [`terraform/modules/app/agentcore-code-interpreter`](../agentcore-code-interpreter/)
+(plan Unit 4). That layer redacts by **value** — session-scoped token
+strings registered by the preamble — and can therefore catch any token the
+agent's own preamble has handled.
+This backstop redacts by **pattern**: it has no access to session values,
+only to the bytes that reach CloudWatch. It exists to mitigate *stdio-bypass
+classes* named in R13's residual list:
+- `subprocess.run(['env'])`, `subprocess.run(['cat', '/proc/self/environ'])`
+- `os.write(fd, ...)` at the file-descriptor level
+- C-extension writes to fd 1 directly
+- `multiprocessing` workers with fresh Python interpreters where the
+  session token set hasn't been populated
+- Adversarial split-writes that fragment a token across more bytes than the
+  rolling-buffer window
+When those bytes carry a token whose *shape* is recognizable, this backstop
+catches it. When they don't (bespoke token formats, high-entropy opaque
+strings with no prefix), only the v2 in-process credential proxy can help.
+## Pattern set
+Defined in `packages/lambda/sandbox-log-scrubber.ts`; add to it before
+relying on new token shapes in production:
+- `Authorization:\s*Bearer\s+<opaque-run>`
+- JWT three-dotted base64url (16 chars min per segment)
+- `gh[oprsu]_<A-Za-z0-9>{20,}` — GitHub PATs, OAuth, server-to-server
+- `xox[abep]-<A-Za-z0-9-]{10,}` — Slack bot / app / user / export
+- `ya29.<A-Za-z0-9_->{20,}` — Google short-lived OAuth
+## What it creates
+- `/thinkwork/{stage}/sandbox/scrubbed` CloudWatch log group (90-day retention)
+- IAM execution role `thinkwork-{stage}-sandbox-log-scrubber`
+- Lambda function `thinkwork-{stage}-sandbox-log-scrubber` (node20, 256MB, 30s)
+- Subscription filter on the source log group — delivers every event (no filter pattern)
+- Invoke permission from CloudWatch Logs to the Lambda
+## Failure tolerance
+If the scrubber Lambda fails — OOM, bug, cold-start timeout — source events
+remain in the original CloudWatch log group. S3 export of the scrubbed group
+is delayed, but no data is lost. The source group retains for whatever its
+own retention policy specifies (typically 90 days).
+## Build + wire
+1. `bash scripts/build-lambdas.sh sandbox-log-scrubber` — produces
+   `dist/lambdas/sandbox-log-scrubber.zip`.
+2. In the caller module (e.g., `terraform/examples/greenfield`), pass
+   `lambda_zip_path` and `lambda_zip_hash` (typically `filebase64sha256()`
+   of the zip).
+3. Pass `source_log_group_name` pointing at the AgentCore runtime group
+   emitting APPLICATION_LOGS for the stage.