npm - thinkwork-cli - Versions diffs - 0.8.2 → 0.9.0 - Mend

thinkwork-cli 0.8.2 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/README.md +16 -0
package/dist/cli.js +1927 -10
package/dist/terraform/modules/app/agentcore-runtime/main.tf +39 -4
package/dist/terraform/modules/app/lambda-api/handlers.tf +112 -8
package/dist/terraform/modules/app/lambda-api/main.tf +78 -3
package/dist/terraform/modules/foundation/cognito/variables.tf +5 -2
package/dist/terraform/modules/thinkwork/variables.tf +12 -4
package/dist/terraform/schema.graphql +28 -0
package/package.json +1 -1

package/dist/terraform/modules/app/agentcore-runtime/main.tf CHANGED Viewed

@@ -154,10 +154,45 @@ resource "aws_iam_role_policy" "agentcore" {
         Resource = "*"
       },
       {
-        Sid      = "CloudWatchLogs"
-        Effect   = "Allow"
-        Action   = ["logs:CreateLogGroup", "logs:CreateLogStream", "logs:PutLogEvents"]
-        Resource = "arn:aws:logs:${var.region}:${var.account_id}:log-group:/aws/lambda/thinkwork-${var.stage}-*"
+        Sid    = "CloudWatchLogs"
+        Effect = "Allow"
+        Action = [
+          "logs:CreateLogGroup",
+          "logs:CreateLogStream",
+          "logs:DescribeLogGroups",
+          "logs:DescribeLogStreams",
+          "logs:PutLogEvents",
+        ]
+        # Lambda log group + AgentCore Runtime container log groups + the
+        # account-wide aws/spans log group (CloudWatch Transaction Search
+        # destination — required for AgentCore Evaluations to read spans).
+        # Each entry is doubled with `:*` so log-STREAM operations are
+        # allowed (log-group ARN without `:*` covers group-level ops only).
+        Resource = [
+          "arn:aws:logs:${var.region}:${var.account_id}:log-group:/aws/lambda/thinkwork-${var.stage}-*",
+          "arn:aws:logs:${var.region}:${var.account_id}:log-group:/aws/lambda/thinkwork-${var.stage}-*:*",
+          "arn:aws:logs:${var.region}:${var.account_id}:log-group:/aws/bedrock-agentcore/runtimes/*",
+          "arn:aws:logs:${var.region}:${var.account_id}:log-group:/aws/bedrock-agentcore/runtimes/*:*",
+          "arn:aws:logs:${var.region}:${var.account_id}:log-group:aws/spans",
+          "arn:aws:logs:${var.region}:${var.account_id}:log-group:aws/spans:*",
+        ]
+      },
+      {
+        # X-Ray ingestion — ADOT exporters publish spans here, which then
+        # flow to aws/spans via the Transaction Search policy. AgentCore
+        # Evaluations queries those spans by session.id when scoring runs.
+        Sid    = "XRayIngest"
+        Effect = "Allow"
+        Action = [
+          "xray:PutTraceSegments",
+          "xray:PutTelemetryRecords",
+          "xray:GetSamplingRules",
+          "xray:GetSamplingTargets",
+        ]
+        Resource = [
+          "arn:aws:xray:${var.region}:${var.account_id}:*",
+          "*",
+        ]
       },
       {
         Sid      = "ECRPull"

package/dist/terraform/modules/app/lambda-api/handlers.tf CHANGED Viewed

@@ -51,7 +51,7 @@ locals {
     # path. When unset, syncExternalTaskOnCreate writes sync_status='local'
     # and the workflow picker proxy returns 503. Set to the LMI develop /
     # staging / prod base URL per stage to enable real cross-system sync.
-    LASTMILE_TASKS_API_URL   = var.lastmile_tasks_api_url
+    LASTMILE_TASKS_API_URL = var.lastmile_tasks_api_url
   }
   # Per-handler env-var overrides. ARNs are constructed from the naming
@@ -62,6 +62,15 @@ locals {
       JOB_TRIGGER_ARN      = "arn:aws:lambda:${var.region}:${var.account_id}:function:thinkwork-${var.stage}-api-job-trigger"
       JOB_TRIGGER_ROLE_ARN = var.job_scheduler_role_arn
     }
+    # Compounding Memory compile Lambda. Claude Haiku 4.5 via Bedrock; the
+    # planner + section-writer cap themselves at ~500 records / 25 new pages
+    # per invocation, so a 480 s timeout covers the worst case comfortably.
+    "wiki-compile" = {
+      BEDROCK_MODEL_ID = "us.anthropic.claude-haiku-4-5-20251001-v1:0"
+    }
+    "wiki-export" = {
+      WIKI_EXPORT_BUCKET = aws_s3_bucket.wiki_exports.bucket
+    }
   }
 }
@@ -96,7 +105,6 @@ resource "aws_lambda_function" "handler" {
     "webhooks",
     "webhooks-admin",
     "webhook-deliveries-cleanup",
-    "task-connectors",
     "workspace-files",
     "knowledge-base-manager",
     "knowledge-base-files",
@@ -106,19 +114,30 @@ resource "aws_lambda_function" "handler" {
     "github-repos",
     "memory",
     "memory-retain",
+    "wiki-compile",
+    "wiki-lint",
+    "wiki-export",
+    "wiki-bootstrap-import",
     "artifact-deliver",
     "recipe-refresh",
     "agent-skills-list",
     "bootstrap-workspaces",
     "code-factory",
+    "eval-runner",
   ]) : toset([])
   function_name = "thinkwork-${var.stage}-api-${each.key}"
   role          = aws_iam_role.lambda.arn
   handler       = "index.handler"
   runtime       = local.runtime
-  timeout       = each.key == "wakeup-processor" ? 300 : each.key == "chat-agent-invoke" ? 300 : 30
-  memory_size   = each.key == "graphql-http" ? 512 : each.key == "wakeup-processor" ? 512 : 256
+  # eval-runner walks every test case sequentially, invoking an agent +
+  # waiting up to 2 min for spans to propagate per test, so a 10-test run
+  # can easily exceed the 30 s default. 900 s covers ~5-15 min sweeps.
+  # wiki-bootstrap-import runs a full Hindsight ingest for ~3,000 records;
+  # the LLM-backed retain path makes it the longest-running Lambda in the
+  # set. 900 s is Lambda's per-invocation max and matches eval-runner's ceiling.
+  timeout     = each.key == "wakeup-processor" ? 300 : each.key == "chat-agent-invoke" ? 300 : each.key == "eval-runner" ? 900 : each.key == "wiki-compile" ? 480 : each.key == "wiki-lint" ? 300 : each.key == "wiki-export" ? 600 : each.key == "wiki-bootstrap-import" ? 900 : 30
+  memory_size = each.key == "graphql-http" ? 512 : each.key == "wakeup-processor" ? 512 : each.key == "eval-runner" ? 512 : each.key == "wiki-compile" ? 1024 : each.key == "wiki-export" ? 1024 : each.key == "wiki-bootstrap-import" ? 1024 : 256
   filename         = "${var.lambda_zips_dir}/${each.key}.zip"
   source_code_hash = filebase64sha256("${var.lambda_zips_dir}/${each.key}.zip")
@@ -222,10 +241,6 @@ locals {
     "ANY /api/webhooks/{proxy+}" = "webhooks-admin"
     "ANY /api/webhooks"          = "webhooks-admin"
-    # Task Connectors admin
-    "ANY /api/task-connectors/{proxy+}" = "task-connectors"
-    "ANY /api/task-connectors"          = "task-connectors"
     # Workspace files
     "ANY /api/workspaces/{proxy+}" = "workspace-files"
@@ -321,6 +336,94 @@ resource "aws_scheduler_schedule" "webhook_deliveries_cleanup" {
   }
 }
+# ---------------------------------------------------------------------------
+# Compounding Memory — nightly hygiene + export
+# ---------------------------------------------------------------------------
+resource "aws_scheduler_schedule" "wiki_lint" {
+  count = local.use_local_zips ? 1 : 0
+  name                = "thinkwork-${var.stage}-wiki-lint"
+  group_name          = "default"
+  schedule_expression = "cron(0 2 * * ? *)" # daily at 02:00 UTC
+  state               = "ENABLED"
+  flexible_time_window {
+    mode = "OFF"
+  }
+  target {
+    arn      = aws_lambda_function.handler["wiki-lint"].arn
+    role_arn = aws_iam_role.scheduler.arn
+  }
+}
+resource "aws_scheduler_schedule" "wiki_export" {
+  count = local.use_local_zips ? 1 : 0
+  name                = "thinkwork-${var.stage}-wiki-export"
+  group_name          = "default"
+  schedule_expression = "cron(0 3 * * ? *)" # daily at 03:00 UTC (after lint)
+  state               = "ENABLED"
+  flexible_time_window {
+    mode = "OFF"
+  }
+  target {
+    arn      = aws_lambda_function.handler["wiki-export"].arn
+    role_arn = aws_iam_role.scheduler.arn
+  }
+}
+# S3 bucket for markdown vault exports. One bundle per (tenant, owner, date).
+# Retention is handled by the lifecycle rule below (30 days).
+resource "aws_s3_bucket" "wiki_exports" {
+  bucket        = "thinkwork-${var.stage}-wiki-exports"
+  force_destroy = var.stage == "dev"
+  tags = {
+    Name = "thinkwork-${var.stage}-wiki-exports"
+  }
+}
+resource "aws_s3_bucket_public_access_block" "wiki_exports" {
+  bucket                  = aws_s3_bucket.wiki_exports.id
+  block_public_acls       = true
+  block_public_policy     = true
+  ignore_public_acls      = true
+  restrict_public_buckets = true
+}
+resource "aws_s3_bucket_lifecycle_configuration" "wiki_exports" {
+  bucket = aws_s3_bucket.wiki_exports.id
+  rule {
+    id     = "expire-old-bundles"
+    status = "Enabled"
+    filter {}
+    expiration {
+      days = 30
+    }
+  }
+}
+resource "aws_iam_role_policy" "lambda_wiki_exports_s3" {
+  name = "wiki-exports-s3"
+  role = aws_iam_role.lambda.id
+  policy = jsonencode({
+    Version = "2012-10-17"
+    Statement = [{
+      Effect = "Allow"
+      Action = ["s3:PutObject", "s3:AbortMultipartUpload"]
+      Resource = "${aws_s3_bucket.wiki_exports.arn}/*"
+    }]
+  })
+}
 resource "aws_iam_role" "scheduler" {
   name = "thinkwork-${var.stage}-scheduler-role"
@@ -358,6 +461,7 @@ resource "aws_ssm_parameter" "lambda_arns" {
     "kb-manager-fn-arn"           = aws_lambda_function.handler["knowledge-base-manager"].arn
     "job-schedule-manager-fn-arn" = aws_lambda_function.handler["job-schedule-manager"].arn
     "memory-retain-fn-arn"        = aws_lambda_function.handler["memory-retain"].arn
+    "eval-runner-fn-arn"          = aws_lambda_function.handler["eval-runner"].arn
   } : {}
   name  = "/thinkwork/${var.stage}/${each.key}"

package/dist/terraform/modules/app/lambda-api/main.tf CHANGED Viewed

@@ -196,12 +196,21 @@ resource "aws_iam_role_policy" "lambda_bedrock" {
   name = "bedrock-invoke"
   role = aws_iam_role.lambda.id
+  # Cross-region inference profiles (us.anthropic.claude-*) require
+  # `bedrock:InvokeModel` on the *inference-profile* ARN AND on the
+  # underlying foundation-model ARN in *every* region the profile can
+  # route to (e.g. us-east-2 for us.anthropic.claude-haiku-4-5). The
+  # region wildcard below covers all of them. Needed by the eval-runner
+  # llm-rubric judge and any handler that calls Converse with a profile ID.
   policy = jsonencode({
     Version = "2012-10-17"
     Statement = [{
-      Effect   = "Allow"
-      Action   = ["bedrock:InvokeModel", "bedrock:InvokeModelWithResponseStream"]
-      Resource = "arn:aws:bedrock:${var.region}::foundation-model/*"
+      Effect = "Allow"
+      Action = ["bedrock:InvokeModel", "bedrock:InvokeModelWithResponseStream"]
+      Resource = [
+        "arn:aws:bedrock:*::foundation-model/*",
+        "arn:aws:bedrock:*:${var.account_id}:inference-profile/*",
+      ]
     }]
   })
 }
@@ -252,6 +261,62 @@ resource "aws_iam_role_policy" "lambda_agentcore_invoke" {
   })
 }
+# Eval-runner: invoke the AgentCore Runtime data plane to run an agent
+# under test, and call AgentCore Evaluations.Evaluate to score the
+# resulting spans. Both APIs are on the bedrock-agentcore service. Also
+# allow reading spans + log events from CloudWatch Logs (aws/spans is
+# the Transaction Search destination; the runtime log groups carry the
+# OTel scope=strands.telemetry.tracer log records that EvaluateCommand
+# requires alongside the spans).
+resource "aws_iam_role_policy" "lambda_eval_runner" {
+  name = "eval-runner-bedrock-agentcore"
+  role = aws_iam_role.lambda.id
+  policy = jsonencode({
+    Version = "2012-10-17"
+    Statement = [
+      {
+        Sid      = "AgentCoreInvokeRuntime"
+        Effect   = "Allow"
+        Action   = ["bedrock-agentcore:InvokeAgentRuntime"]
+        Resource = "arn:aws:bedrock-agentcore:${var.region}:${var.account_id}:runtime/*"
+      },
+      {
+        Sid    = "AgentCoreEvaluate"
+        Effect = "Allow"
+        Action = [
+          "bedrock-agentcore:Evaluate",
+          "bedrock-agentcore:GetEvaluator",
+          "bedrock-agentcore:ListEvaluators",
+        ]
+        Resource = "*"
+      },
+      {
+        Sid    = "EvalSpansRead"
+        Effect = "Allow"
+        Action = [
+          "logs:FilterLogEvents",
+          "logs:GetLogEvents",
+          "logs:DescribeLogGroups",
+          "logs:DescribeLogStreams",
+        ]
+        Resource = [
+          "arn:aws:logs:${var.region}:${var.account_id}:log-group:aws/spans",
+          "arn:aws:logs:${var.region}:${var.account_id}:log-group:aws/spans:*",
+          "arn:aws:logs:${var.region}:${var.account_id}:log-group:/aws/bedrock-agentcore/runtimes/*",
+          "arn:aws:logs:${var.region}:${var.account_id}:log-group:/aws/bedrock-agentcore/runtimes/*:*",
+        ]
+      },
+      {
+        Sid      = "SsmReadEvalRunnerCfg"
+        Effect   = "Allow"
+        Action   = ["ssm:GetParameter", "ssm:GetParameters"]
+        Resource = "arn:aws:ssm:${var.region}:${var.account_id}:parameter/thinkwork/${var.stage}/agentcore/runtime-id-*"
+      },
+    ]
+  })
+}
 # AgentCore Memory read access for the GraphQL memory resolvers.
 # memoryRecords / memorySearch call ListMemoryRecordsCommand to fetch
 # records across the tenant's agents.
@@ -362,6 +427,16 @@ resource "aws_iam_role_policy" "lambda_api_cross_invoke" {
         "arn:aws:lambda:${var.region}:${var.account_id}:function:thinkwork-${var.stage}-api-chat-agent-invoke",
         "arn:aws:lambda:${var.region}:${var.account_id}:function:thinkwork-${var.stage}-api-knowledge-base-manager",
         "arn:aws:lambda:${var.region}:${var.account_id}:function:thinkwork-${var.stage}-api-job-schedule-manager",
+        # eval-runner: graphql-http's startEvalRun mutation Event-invokes
+        # this asynchronously after inserting the eval_runs row.
+        "arn:aws:lambda:${var.region}:${var.account_id}:function:thinkwork-${var.stage}-api-eval-runner",
+        # wiki-compile: memory-retain Event-invokes this after a successful
+        # retainTurn when the tenant's wiki_compile_enabled flag is on.
+        # compileWikiNow admin mutation also Event-invokes.
+        "arn:aws:lambda:${var.region}:${var.account_id}:function:thinkwork-${var.stage}-api-wiki-compile",
+        # wiki-bootstrap-import: bootstrapJournalImport admin mutation
+        # Event-invokes this for the long-running ingest path.
+        "arn:aws:lambda:${var.region}:${var.account_id}:function:thinkwork-${var.stage}-api-wiki-bootstrap-import",
       ]
     }]
   })

package/dist/terraform/modules/foundation/cognito/variables.tf CHANGED Viewed

@@ -107,20 +107,23 @@ variable "admin_logout_urls" {
 }
 variable "mobile_callback_urls" {
-  description = "OAuth callback URLs for the mobile client"
+  description = "OAuth callback URLs for the mobile client. Includes LastMile's `myapp://` scheme (host apps that embed the SDK register their own deep-link here). Proper per-host app client isolation is 0.3.0 work — this is the stopgap capture of the drift from the CLI-applied URIs."
   type        = list(string)
   default = [
     "exp://localhost:8081",
     "thinkwork://",
     "thinkwork://auth/callback",
+    "myapp://",
+    "myapp://oauth/callback",
   ]
 }
 variable "mobile_logout_urls" {
-  description = "OAuth logout URLs for the mobile client"
+  description = "OAuth logout URLs for the mobile client. Includes LastMile's `myapp://` scheme (see `mobile_callback_urls` for rationale)."
   type        = list(string)
   default = [
     "exp://localhost:8081",
     "thinkwork://",
+    "myapp://",
   ]
 }

package/dist/terraform/modules/thinkwork/variables.tf CHANGED Viewed

@@ -217,13 +217,21 @@ variable "api_auth_secret" {
 # ---------------------------------------------------------------------------
 variable "admin_callback_urls" {
-  type    = list(string)
-  default = ["http://localhost:5174", "http://localhost:5174/auth/callback"]
+  type = list(string)
+  default = [
+    "http://localhost:5174",
+    "http://localhost:5174/auth/callback",
+    "http://localhost:5175",
+    "http://localhost:5175/auth/callback",
+  ]
 }
 variable "admin_logout_urls" {
-  type    = list(string)
-  default = ["http://localhost:5174"]
+  type = list(string)
+  default = [
+    "http://localhost:5174",
+    "http://localhost:5175",
+  ]
 }
 variable "mobile_callback_urls" {

package/dist/terraform/schema.graphql CHANGED Viewed

@@ -96,6 +96,19 @@ type CostRecordedEvent {
   updatedAt: AWSDateTime!
 }
+type EvalRunUpdateEvent {
+  runId: ID!
+  tenantId: ID!
+  agentId: ID
+  status: String!
+  totalTests: Int
+  passed: Int
+  failed: Int
+  passRate: Float
+  errorMessage: String
+  updatedAt: AWSDateTime!
+}
 # ────────────────────────────────────────────────────────────────────
 # Notification mutations
 #
@@ -169,6 +182,18 @@ type Mutation {
     amountUsd: Float!
     model: String
   ): CostRecordedEvent @aws_api_key @aws_cognito_user_pools @aws_iam
+  notifyEvalRunUpdate(
+    runId: ID!
+    tenantId: ID!
+    agentId: ID
+    status: String!
+    totalTests: Int
+    passed: Int
+    failed: Int
+    passRate: Float
+    errorMessage: String
+  ): EvalRunUpdateEvent @aws_api_key @aws_cognito_user_pools @aws_iam
 }
 type Subscription {
@@ -196,4 +221,7 @@ type Subscription {
   onCostRecorded(tenantId: ID!): CostRecordedEvent @aws_api_key @aws_cognito_user_pools @aws_iam
     @aws_subscribe(mutations: ["notifyCostRecorded"])
+  onEvalRunUpdated(tenantId: ID!): EvalRunUpdateEvent @aws_api_key @aws_cognito_user_pools @aws_iam
+    @aws_subscribe(mutations: ["notifyEvalRunUpdate"])
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "thinkwork-cli",
-  "version": "0.8.2",
+  "version": "0.9.0",
   "description": "Thinkwork CLI — deploy, manage, and interact with your Thinkwork stack",
   "license": "MIT",
   "type": "module",