thinkwork-cli 0.8.2 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -154,10 +154,45 @@ resource "aws_iam_role_policy" "agentcore" {
154
154
  Resource = "*"
155
155
  },
156
156
  {
157
- Sid = "CloudWatchLogs"
158
- Effect = "Allow"
159
- Action = ["logs:CreateLogGroup", "logs:CreateLogStream", "logs:PutLogEvents"]
160
- Resource = "arn:aws:logs:${var.region}:${var.account_id}:log-group:/aws/lambda/thinkwork-${var.stage}-*"
157
+ Sid = "CloudWatchLogs"
158
+ Effect = "Allow"
159
+ Action = [
160
+ "logs:CreateLogGroup",
161
+ "logs:CreateLogStream",
162
+ "logs:DescribeLogGroups",
163
+ "logs:DescribeLogStreams",
164
+ "logs:PutLogEvents",
165
+ ]
166
+ # Lambda log group + AgentCore Runtime container log groups + the
167
+ # account-wide aws/spans log group (CloudWatch Transaction Search
168
+ # destination — required for AgentCore Evaluations to read spans).
169
+ # Each entry is doubled with `:*` so log-STREAM operations are
170
+ # allowed (log-group ARN without `:*` covers group-level ops only).
171
+ Resource = [
172
+ "arn:aws:logs:${var.region}:${var.account_id}:log-group:/aws/lambda/thinkwork-${var.stage}-*",
173
+ "arn:aws:logs:${var.region}:${var.account_id}:log-group:/aws/lambda/thinkwork-${var.stage}-*:*",
174
+ "arn:aws:logs:${var.region}:${var.account_id}:log-group:/aws/bedrock-agentcore/runtimes/*",
175
+ "arn:aws:logs:${var.region}:${var.account_id}:log-group:/aws/bedrock-agentcore/runtimes/*:*",
176
+ "arn:aws:logs:${var.region}:${var.account_id}:log-group:aws/spans",
177
+ "arn:aws:logs:${var.region}:${var.account_id}:log-group:aws/spans:*",
178
+ ]
179
+ },
180
+ {
181
+ # X-Ray ingestion — ADOT exporters publish spans here, which then
182
+ # flow to aws/spans via the Transaction Search policy. AgentCore
183
+ # Evaluations queries those spans by session.id when scoring runs.
184
+ Sid = "XRayIngest"
185
+ Effect = "Allow"
186
+ Action = [
187
+ "xray:PutTraceSegments",
188
+ "xray:PutTelemetryRecords",
189
+ "xray:GetSamplingRules",
190
+ "xray:GetSamplingTargets",
191
+ ]
192
+ Resource = [
193
+ "arn:aws:xray:${var.region}:${var.account_id}:*",
194
+ "*",
195
+ ]
161
196
  },
162
197
  {
163
198
  Sid = "ECRPull"
@@ -51,7 +51,7 @@ locals {
51
51
  # path. When unset, syncExternalTaskOnCreate writes sync_status='local'
52
52
  # and the workflow picker proxy returns 503. Set to the LMI develop /
53
53
  # staging / prod base URL per stage to enable real cross-system sync.
54
- LASTMILE_TASKS_API_URL = var.lastmile_tasks_api_url
54
+ LASTMILE_TASKS_API_URL = var.lastmile_tasks_api_url
55
55
  }
56
56
 
57
57
  # Per-handler env-var overrides. ARNs are constructed from the naming
@@ -62,6 +62,15 @@ locals {
62
62
  JOB_TRIGGER_ARN = "arn:aws:lambda:${var.region}:${var.account_id}:function:thinkwork-${var.stage}-api-job-trigger"
63
63
  JOB_TRIGGER_ROLE_ARN = var.job_scheduler_role_arn
64
64
  }
65
+ # Compounding Memory compile Lambda. Claude Haiku 4.5 via Bedrock; the
66
+ # planner + section-writer cap themselves at ~500 records / 25 new pages
67
+ # per invocation, so a 480 s timeout covers the worst case comfortably.
68
+ "wiki-compile" = {
69
+ BEDROCK_MODEL_ID = "us.anthropic.claude-haiku-4-5-20251001-v1:0"
70
+ }
71
+ "wiki-export" = {
72
+ WIKI_EXPORT_BUCKET = aws_s3_bucket.wiki_exports.bucket
73
+ }
65
74
  }
66
75
  }
67
76
 
@@ -96,7 +105,6 @@ resource "aws_lambda_function" "handler" {
96
105
  "webhooks",
97
106
  "webhooks-admin",
98
107
  "webhook-deliveries-cleanup",
99
- "task-connectors",
100
108
  "workspace-files",
101
109
  "knowledge-base-manager",
102
110
  "knowledge-base-files",
@@ -106,19 +114,30 @@ resource "aws_lambda_function" "handler" {
106
114
  "github-repos",
107
115
  "memory",
108
116
  "memory-retain",
117
+ "wiki-compile",
118
+ "wiki-lint",
119
+ "wiki-export",
120
+ "wiki-bootstrap-import",
109
121
  "artifact-deliver",
110
122
  "recipe-refresh",
111
123
  "agent-skills-list",
112
124
  "bootstrap-workspaces",
113
125
  "code-factory",
126
+ "eval-runner",
114
127
  ]) : toset([])
115
128
 
116
129
  function_name = "thinkwork-${var.stage}-api-${each.key}"
117
130
  role = aws_iam_role.lambda.arn
118
131
  handler = "index.handler"
119
132
  runtime = local.runtime
120
- timeout = each.key == "wakeup-processor" ? 300 : each.key == "chat-agent-invoke" ? 300 : 30
121
- memory_size = each.key == "graphql-http" ? 512 : each.key == "wakeup-processor" ? 512 : 256
133
+ # eval-runner walks every test case sequentially, invoking an agent +
134
+ # waiting up to 2 min for spans to propagate per test, so a 10-test run
135
+ # can easily exceed the 30 s default. 900 s covers ~5-15 min sweeps.
136
+ # wiki-bootstrap-import runs a full Hindsight ingest for ~3,000 records;
137
+ # the LLM-backed retain path makes it the longest-running Lambda in the
138
+ # set. 900 s is Lambda's per-invocation max and matches eval-runner's ceiling.
139
+ timeout = each.key == "wakeup-processor" ? 300 : each.key == "chat-agent-invoke" ? 300 : each.key == "eval-runner" ? 900 : each.key == "wiki-compile" ? 480 : each.key == "wiki-lint" ? 300 : each.key == "wiki-export" ? 600 : each.key == "wiki-bootstrap-import" ? 900 : 30
140
+ memory_size = each.key == "graphql-http" ? 512 : each.key == "wakeup-processor" ? 512 : each.key == "eval-runner" ? 512 : each.key == "wiki-compile" ? 1024 : each.key == "wiki-export" ? 1024 : each.key == "wiki-bootstrap-import" ? 1024 : 256
122
141
 
123
142
  filename = "${var.lambda_zips_dir}/${each.key}.zip"
124
143
  source_code_hash = filebase64sha256("${var.lambda_zips_dir}/${each.key}.zip")
@@ -222,10 +241,6 @@ locals {
222
241
  "ANY /api/webhooks/{proxy+}" = "webhooks-admin"
223
242
  "ANY /api/webhooks" = "webhooks-admin"
224
243
 
225
- # Task Connectors admin
226
- "ANY /api/task-connectors/{proxy+}" = "task-connectors"
227
- "ANY /api/task-connectors" = "task-connectors"
228
-
229
244
  # Workspace files
230
245
  "ANY /api/workspaces/{proxy+}" = "workspace-files"
231
246
 
@@ -321,6 +336,94 @@ resource "aws_scheduler_schedule" "webhook_deliveries_cleanup" {
321
336
  }
322
337
  }
323
338
 
339
+ # ---------------------------------------------------------------------------
340
+ # Compounding Memory — nightly hygiene + export
341
+ # ---------------------------------------------------------------------------
342
+
343
+ resource "aws_scheduler_schedule" "wiki_lint" {
344
+ count = local.use_local_zips ? 1 : 0
345
+
346
+ name = "thinkwork-${var.stage}-wiki-lint"
347
+ group_name = "default"
348
+ schedule_expression = "cron(0 2 * * ? *)" # daily at 02:00 UTC
349
+ state = "ENABLED"
350
+
351
+ flexible_time_window {
352
+ mode = "OFF"
353
+ }
354
+
355
+ target {
356
+ arn = aws_lambda_function.handler["wiki-lint"].arn
357
+ role_arn = aws_iam_role.scheduler.arn
358
+ }
359
+ }
360
+
361
+ resource "aws_scheduler_schedule" "wiki_export" {
362
+ count = local.use_local_zips ? 1 : 0
363
+
364
+ name = "thinkwork-${var.stage}-wiki-export"
365
+ group_name = "default"
366
+ schedule_expression = "cron(0 3 * * ? *)" # daily at 03:00 UTC (after lint)
367
+ state = "ENABLED"
368
+
369
+ flexible_time_window {
370
+ mode = "OFF"
371
+ }
372
+
373
+ target {
374
+ arn = aws_lambda_function.handler["wiki-export"].arn
375
+ role_arn = aws_iam_role.scheduler.arn
376
+ }
377
+ }
378
+
379
+ # S3 bucket for markdown vault exports. One bundle per (tenant, owner, date).
380
+ # Retention is handled by the lifecycle rule below (30 days).
381
+ resource "aws_s3_bucket" "wiki_exports" {
382
+ bucket = "thinkwork-${var.stage}-wiki-exports"
383
+ force_destroy = var.stage == "dev"
384
+
385
+ tags = {
386
+ Name = "thinkwork-${var.stage}-wiki-exports"
387
+ }
388
+ }
389
+
390
+ resource "aws_s3_bucket_public_access_block" "wiki_exports" {
391
+ bucket = aws_s3_bucket.wiki_exports.id
392
+ block_public_acls = true
393
+ block_public_policy = true
394
+ ignore_public_acls = true
395
+ restrict_public_buckets = true
396
+ }
397
+
398
+ resource "aws_s3_bucket_lifecycle_configuration" "wiki_exports" {
399
+ bucket = aws_s3_bucket.wiki_exports.id
400
+
401
+ rule {
402
+ id = "expire-old-bundles"
403
+ status = "Enabled"
404
+
405
+ filter {}
406
+
407
+ expiration {
408
+ days = 30
409
+ }
410
+ }
411
+ }
412
+
413
+ resource "aws_iam_role_policy" "lambda_wiki_exports_s3" {
414
+ name = "wiki-exports-s3"
415
+ role = aws_iam_role.lambda.id
416
+
417
+ policy = jsonencode({
418
+ Version = "2012-10-17"
419
+ Statement = [{
420
+ Effect = "Allow"
421
+ Action = ["s3:PutObject", "s3:AbortMultipartUpload"]
422
+ Resource = "${aws_s3_bucket.wiki_exports.arn}/*"
423
+ }]
424
+ })
425
+ }
426
+
324
427
  resource "aws_iam_role" "scheduler" {
325
428
  name = "thinkwork-${var.stage}-scheduler-role"
326
429
 
@@ -358,6 +461,7 @@ resource "aws_ssm_parameter" "lambda_arns" {
358
461
  "kb-manager-fn-arn" = aws_lambda_function.handler["knowledge-base-manager"].arn
359
462
  "job-schedule-manager-fn-arn" = aws_lambda_function.handler["job-schedule-manager"].arn
360
463
  "memory-retain-fn-arn" = aws_lambda_function.handler["memory-retain"].arn
464
+ "eval-runner-fn-arn" = aws_lambda_function.handler["eval-runner"].arn
361
465
  } : {}
362
466
 
363
467
  name = "/thinkwork/${var.stage}/${each.key}"
@@ -196,12 +196,21 @@ resource "aws_iam_role_policy" "lambda_bedrock" {
196
196
  name = "bedrock-invoke"
197
197
  role = aws_iam_role.lambda.id
198
198
 
199
+ # Cross-region inference profiles (us.anthropic.claude-*) require
200
+ # `bedrock:InvokeModel` on the *inference-profile* ARN AND on the
201
+ # underlying foundation-model ARN in *every* region the profile can
202
+ # route to (e.g. us-east-2 for us.anthropic.claude-haiku-4-5). The
203
+ # region wildcard below covers all of them. Needed by the eval-runner
204
+ # llm-rubric judge and any handler that calls Converse with a profile ID.
199
205
  policy = jsonencode({
200
206
  Version = "2012-10-17"
201
207
  Statement = [{
202
- Effect = "Allow"
203
- Action = ["bedrock:InvokeModel", "bedrock:InvokeModelWithResponseStream"]
204
- Resource = "arn:aws:bedrock:${var.region}::foundation-model/*"
208
+ Effect = "Allow"
209
+ Action = ["bedrock:InvokeModel", "bedrock:InvokeModelWithResponseStream"]
210
+ Resource = [
211
+ "arn:aws:bedrock:*::foundation-model/*",
212
+ "arn:aws:bedrock:*:${var.account_id}:inference-profile/*",
213
+ ]
205
214
  }]
206
215
  })
207
216
  }
@@ -252,6 +261,62 @@ resource "aws_iam_role_policy" "lambda_agentcore_invoke" {
252
261
  })
253
262
  }
254
263
 
264
+ # Eval-runner: invoke the AgentCore Runtime data plane to run an agent
265
+ # under test, and call AgentCore Evaluations.Evaluate to score the
266
+ # resulting spans. Both APIs are on the bedrock-agentcore service. Also
267
+ # allow reading spans + log events from CloudWatch Logs (aws/spans is
268
+ # the Transaction Search destination; the runtime log groups carry the
269
+ # OTel scope=strands.telemetry.tracer log records that EvaluateCommand
270
+ # requires alongside the spans).
271
+ resource "aws_iam_role_policy" "lambda_eval_runner" {
272
+ name = "eval-runner-bedrock-agentcore"
273
+ role = aws_iam_role.lambda.id
274
+
275
+ policy = jsonencode({
276
+ Version = "2012-10-17"
277
+ Statement = [
278
+ {
279
+ Sid = "AgentCoreInvokeRuntime"
280
+ Effect = "Allow"
281
+ Action = ["bedrock-agentcore:InvokeAgentRuntime"]
282
+ Resource = "arn:aws:bedrock-agentcore:${var.region}:${var.account_id}:runtime/*"
283
+ },
284
+ {
285
+ Sid = "AgentCoreEvaluate"
286
+ Effect = "Allow"
287
+ Action = [
288
+ "bedrock-agentcore:Evaluate",
289
+ "bedrock-agentcore:GetEvaluator",
290
+ "bedrock-agentcore:ListEvaluators",
291
+ ]
292
+ Resource = "*"
293
+ },
294
+ {
295
+ Sid = "EvalSpansRead"
296
+ Effect = "Allow"
297
+ Action = [
298
+ "logs:FilterLogEvents",
299
+ "logs:GetLogEvents",
300
+ "logs:DescribeLogGroups",
301
+ "logs:DescribeLogStreams",
302
+ ]
303
+ Resource = [
304
+ "arn:aws:logs:${var.region}:${var.account_id}:log-group:aws/spans",
305
+ "arn:aws:logs:${var.region}:${var.account_id}:log-group:aws/spans:*",
306
+ "arn:aws:logs:${var.region}:${var.account_id}:log-group:/aws/bedrock-agentcore/runtimes/*",
307
+ "arn:aws:logs:${var.region}:${var.account_id}:log-group:/aws/bedrock-agentcore/runtimes/*:*",
308
+ ]
309
+ },
310
+ {
311
+ Sid = "SsmReadEvalRunnerCfg"
312
+ Effect = "Allow"
313
+ Action = ["ssm:GetParameter", "ssm:GetParameters"]
314
+ Resource = "arn:aws:ssm:${var.region}:${var.account_id}:parameter/thinkwork/${var.stage}/agentcore/runtime-id-*"
315
+ },
316
+ ]
317
+ })
318
+ }
319
+
255
320
  # AgentCore Memory read access for the GraphQL memory resolvers.
256
321
  # memoryRecords / memorySearch call ListMemoryRecordsCommand to fetch
257
322
  # records across the tenant's agents.
@@ -362,6 +427,16 @@ resource "aws_iam_role_policy" "lambda_api_cross_invoke" {
362
427
  "arn:aws:lambda:${var.region}:${var.account_id}:function:thinkwork-${var.stage}-api-chat-agent-invoke",
363
428
  "arn:aws:lambda:${var.region}:${var.account_id}:function:thinkwork-${var.stage}-api-knowledge-base-manager",
364
429
  "arn:aws:lambda:${var.region}:${var.account_id}:function:thinkwork-${var.stage}-api-job-schedule-manager",
430
+ # eval-runner: graphql-http's startEvalRun mutation Event-invokes
431
+ # this asynchronously after inserting the eval_runs row.
432
+ "arn:aws:lambda:${var.region}:${var.account_id}:function:thinkwork-${var.stage}-api-eval-runner",
433
+ # wiki-compile: memory-retain Event-invokes this after a successful
434
+ # retainTurn when the tenant's wiki_compile_enabled flag is on.
435
+ # compileWikiNow admin mutation also Event-invokes.
436
+ "arn:aws:lambda:${var.region}:${var.account_id}:function:thinkwork-${var.stage}-api-wiki-compile",
437
+ # wiki-bootstrap-import: bootstrapJournalImport admin mutation
438
+ # Event-invokes this for the long-running ingest path.
439
+ "arn:aws:lambda:${var.region}:${var.account_id}:function:thinkwork-${var.stage}-api-wiki-bootstrap-import",
365
440
  ]
366
441
  }]
367
442
  })
@@ -107,20 +107,23 @@ variable "admin_logout_urls" {
107
107
  }
108
108
 
109
109
  variable "mobile_callback_urls" {
110
- description = "OAuth callback URLs for the mobile client"
110
+ description = "OAuth callback URLs for the mobile client. Includes LastMile's `myapp://` scheme (host apps that embed the SDK register their own deep-link here). Proper per-host app client isolation is 0.3.0 work — this is the stopgap capture of the drift from the CLI-applied URIs."
111
111
  type = list(string)
112
112
  default = [
113
113
  "exp://localhost:8081",
114
114
  "thinkwork://",
115
115
  "thinkwork://auth/callback",
116
+ "myapp://",
117
+ "myapp://oauth/callback",
116
118
  ]
117
119
  }
118
120
 
119
121
  variable "mobile_logout_urls" {
120
- description = "OAuth logout URLs for the mobile client"
122
+ description = "OAuth logout URLs for the mobile client. Includes LastMile's `myapp://` scheme (see `mobile_callback_urls` for rationale)."
121
123
  type = list(string)
122
124
  default = [
123
125
  "exp://localhost:8081",
124
126
  "thinkwork://",
127
+ "myapp://",
125
128
  ]
126
129
  }
@@ -217,13 +217,21 @@ variable "api_auth_secret" {
217
217
  # ---------------------------------------------------------------------------
218
218
 
219
219
  variable "admin_callback_urls" {
220
- type = list(string)
221
- default = ["http://localhost:5174", "http://localhost:5174/auth/callback"]
220
+ type = list(string)
221
+ default = [
222
+ "http://localhost:5174",
223
+ "http://localhost:5174/auth/callback",
224
+ "http://localhost:5175",
225
+ "http://localhost:5175/auth/callback",
226
+ ]
222
227
  }
223
228
 
224
229
  variable "admin_logout_urls" {
225
- type = list(string)
226
- default = ["http://localhost:5174"]
230
+ type = list(string)
231
+ default = [
232
+ "http://localhost:5174",
233
+ "http://localhost:5175",
234
+ ]
227
235
  }
228
236
 
229
237
  variable "mobile_callback_urls" {
@@ -96,6 +96,19 @@ type CostRecordedEvent {
96
96
  updatedAt: AWSDateTime!
97
97
  }
98
98
 
99
+ type EvalRunUpdateEvent {
100
+ runId: ID!
101
+ tenantId: ID!
102
+ agentId: ID
103
+ status: String!
104
+ totalTests: Int
105
+ passed: Int
106
+ failed: Int
107
+ passRate: Float
108
+ errorMessage: String
109
+ updatedAt: AWSDateTime!
110
+ }
111
+
99
112
  # ────────────────────────────────────────────────────────────────────
100
113
  # Notification mutations
101
114
  #
@@ -169,6 +182,18 @@ type Mutation {
169
182
  amountUsd: Float!
170
183
  model: String
171
184
  ): CostRecordedEvent @aws_api_key @aws_cognito_user_pools @aws_iam
185
+
186
+ notifyEvalRunUpdate(
187
+ runId: ID!
188
+ tenantId: ID!
189
+ agentId: ID
190
+ status: String!
191
+ totalTests: Int
192
+ passed: Int
193
+ failed: Int
194
+ passRate: Float
195
+ errorMessage: String
196
+ ): EvalRunUpdateEvent @aws_api_key @aws_cognito_user_pools @aws_iam
172
197
  }
173
198
 
174
199
  type Subscription {
@@ -196,4 +221,7 @@ type Subscription {
196
221
 
197
222
  onCostRecorded(tenantId: ID!): CostRecordedEvent @aws_api_key @aws_cognito_user_pools @aws_iam
198
223
  @aws_subscribe(mutations: ["notifyCostRecorded"])
224
+
225
+ onEvalRunUpdated(tenantId: ID!): EvalRunUpdateEvent @aws_api_key @aws_cognito_user_pools @aws_iam
226
+ @aws_subscribe(mutations: ["notifyEvalRunUpdate"])
199
227
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "thinkwork-cli",
3
- "version": "0.8.2",
3
+ "version": "0.9.0",
4
4
  "description": "Thinkwork CLI — deploy, manage, and interact with your Thinkwork stack",
5
5
  "license": "MIT",
6
6
  "type": "module",