@keystrokehq/skills 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/AGENTS-blurb.md +123 -0
  2. package/LICENSE +21 -0
  3. package/README.md +63 -0
  4. package/keystroke-agent-authoring/SKILL.md +225 -0
  5. package/keystroke-agent-authoring/evals/evals.json +29 -0
  6. package/keystroke-agent-authoring/references/messaging-gateways.md +242 -0
  7. package/keystroke-agent-authoring/references/patterns.md +417 -0
  8. package/keystroke-agent-authoring/references/prebuilt-integrations.md +879 -0
  9. package/keystroke-agent-authoring/references/sandbox-and-mcp.md +214 -0
  10. package/keystroke-agent-authoring/references/source-map.md +182 -0
  11. package/keystroke-agent-authoring/references/testing.md +85 -0
  12. package/keystroke-cli-workspace/SKILL.md +93 -0
  13. package/keystroke-cli-workspace/evals/evals.json +23 -0
  14. package/keystroke-cli-workspace/references/command-map.md +50 -0
  15. package/keystroke-cli-workspace/references/credentials-and-connect.md +79 -0
  16. package/keystroke-cli-workspace/references/project-lifecycle.md +85 -0
  17. package/keystroke-credential-binding/SKILL.md +509 -0
  18. package/keystroke-credential-binding/evals/evals.json +29 -0
  19. package/keystroke-credential-binding/references/cli.md +85 -0
  20. package/keystroke-credential-binding/references/patterns.md +878 -0
  21. package/keystroke-credential-binding/references/source-map.md +69 -0
  22. package/keystroke-data-toolkit/SKILL.md +59 -0
  23. package/keystroke-data-toolkit/evals/evals.json +23 -0
  24. package/keystroke-data-toolkit/references/usage.md +79 -0
  25. package/keystroke-task-authoring/SKILL.md +124 -0
  26. package/keystroke-task-authoring/evals/evals.json +23 -0
  27. package/keystroke-task-authoring/references/patterns.md +132 -0
  28. package/keystroke-task-authoring/references/source-map.md +61 -0
  29. package/keystroke-trigger-authoring/SKILL.md +189 -0
  30. package/keystroke-trigger-authoring/evals/evals.json +29 -0
  31. package/keystroke-trigger-authoring/references/patterns.md +265 -0
  32. package/keystroke-trigger-authoring/references/source-map.md +128 -0
  33. package/keystroke-trigger-authoring/references/testing.md +148 -0
  34. package/keystroke-workflow-as-tool-debugging/SKILL.md +52 -0
  35. package/keystroke-workflow-as-tool-debugging/evals/evals.json +23 -0
  36. package/keystroke-workflow-as-tool-debugging/references/playbook.md +77 -0
  37. package/keystroke-workflow-authoring/SKILL.md +234 -0
  38. package/keystroke-workflow-authoring/evals/evals.json +29 -0
  39. package/keystroke-workflow-authoring/references/patterns.md +265 -0
  40. package/keystroke-workflow-authoring/references/prebuilt-integrations.md +811 -0
  41. package/keystroke-workflow-authoring/references/runtime-helpers.md +264 -0
  42. package/keystroke-workflow-authoring/references/source-map.md +108 -0
  43. package/keystroke-workflow-authoring/references/testing.md +108 -0
  44. package/package.json +26 -0
@@ -0,0 +1,148 @@
1
+ # Trigger Testing
2
+
3
+ Read this file when the user asks how to test a Keystroke trigger.
4
+
5
+ Assume `paymentWebhook`, `orderPolling`, and `paymentWorkflow` are the public trigger and workflow instances shown elsewhere in this skill.
6
+
7
+ ## Vitest setup
8
+
9
+ `keystrokeTestPlugin()` adds the core test setup file to Vitest, which is required for credential resolution and context mocking.
10
+
11
+ ```ts
12
+ import { defineConfig } from 'vitest/config';
13
+ import { keystrokeTestPlugin } from '@keystrokehq/core/vitest';
14
+
15
+ export default defineConfig({
16
+ plugins: [keystrokeTestPlugin()],
17
+ });
18
+ ```
19
+
20
+ ## Test webhook verification and filtering
21
+
22
+ ```ts
23
+ const request = {
24
+ headers: {
25
+ 'x-signature': 'signed',
26
+ },
27
+ query: {},
28
+ rawBody: JSON.stringify({
29
+ id: 'evt_123',
30
+ type: 'payment.completed',
31
+ amount: 5000,
32
+ }),
33
+ method: 'POST',
34
+ path: '/payments',
35
+ };
36
+
37
+ await paymentWebhook.verify?.(request, {
38
+ credentials: {},
39
+ triggerName: paymentWebhook.name,
40
+ triggerType: 'webhook',
41
+ });
42
+
43
+ const payload = paymentWebhook.payload.parse(JSON.parse(request.rawBody));
44
+ const shouldRun = paymentWebhook.filter?.(payload, request);
45
+ ```
46
+
47
+ This isolates the webhook-only concerns:
48
+
49
+ - authenticity checks in `verify`
50
+ - parsing via `trigger.payload.parse(JSON.parse(request.rawBody))`
51
+ - event gating in `filter`
52
+
53
+ ## Test polling behavior
54
+
55
+ ```ts
56
+ const response = await orderPolling.poll({
57
+ credentials: {},
58
+ triggerName: orderPolling.name,
59
+ triggerType: 'polling',
60
+ lastPolledAt: new Date().toISOString(),
61
+ lastResponse: {
62
+ orderId: 'order_123',
63
+ status: 'created',
64
+ },
65
+ });
66
+
67
+ const payload = orderPolling.parseResponse(response);
68
+ ```
69
+
70
+ This isolates the polling-only concerns:
71
+
72
+ - what `poll(...)` returns
73
+ - how prior state affects the next poll
74
+ - whether the response matches the trigger schema
75
+
76
+ ## Test bound trigger transform
77
+
78
+ Create a bound trigger by calling the trigger as a function with `{ transform }`, then call `bound.transform?.(payload)` to test the mapping:
79
+
80
+ ```ts
81
+ const bound = paymentWebhook({
82
+ transform: (payload) => ({
83
+ eventId: payload.id,
84
+ amount: payload.amount,
85
+ }),
86
+ });
87
+
88
+ const workflowInput = bound.transform?.(
89
+ { id: 'evt_123', type: 'payment.completed', amount: 5000 },
90
+ );
91
+
92
+ expect(workflowInput).toEqual({ eventId: 'evt_123', amount: 5000 });
93
+ ```
94
+
95
+ This tests only the mapping from trigger payload to workflow input.
96
+
97
+ ## Test bound trigger transform with request context
98
+
99
+ ```ts
100
+ const bound = paymentWebhook({
101
+ transform: (payload, request) => ({
102
+ eventId: payload.id,
103
+ amount: payload.amount,
104
+ }),
105
+ });
106
+
107
+ const workflowInput = bound.transform?.(
108
+ { id: 'evt_123', type: 'payment.completed', amount: 5000 },
109
+ request,
110
+ );
111
+ ```
112
+
113
+ Pass the optional second argument when the transform depends on webhook request data (headers, query params, etc.). Note that only `verify` receives credentials — `transform`, `filter`, and `idempotencyKey` do not.
114
+
115
+ ## Full webhook-to-workflow test
116
+
117
+ ```ts
118
+ it('maps a valid webhook event into workflow input', async () => {
119
+ const request = mockRequest({
120
+ headers: { 'x-signature': 'signed' },
121
+ rawBody: JSON.stringify({ id: 'evt_123', type: 'payment.completed', amount: 5000 }),
122
+ });
123
+
124
+ await paymentWebhook.verify?.(request, {
125
+ credentials: {},
126
+ triggerName: paymentWebhook.name,
127
+ triggerType: 'webhook',
128
+ });
129
+
130
+ const payload = paymentWebhook.payload.parse(JSON.parse(request.rawBody));
131
+ const passed = paymentWebhook.filter?.(payload, request);
132
+ expect(passed).toBe(true);
133
+
134
+ const bound = paymentWebhook({
135
+ transform: (p) => ({ eventId: p.id, amount: p.amount }),
136
+ });
137
+ const workflowInput = bound.transform?.(payload);
138
+ expect(workflowInput).toEqual({ eventId: 'evt_123', amount: 5000 });
139
+ });
140
+ ```
141
+
142
+ ## What to validate
143
+
144
+ - webhook verification success and failure
145
+ - filter behavior
146
+ - idempotency key behavior when defined
147
+ - polling response shape
148
+ - transform correctness from trigger payload to workflow input
@@ -0,0 +1,52 @@
1
+ ---
2
+ name: keystroke-workflow-as-tool-debugging
3
+ description: Debug Keystroke workflows used as agent tools. Use when investigating workflow-tool eligibility, yield/resume behavior, agent_continue, agent_resume, refs, credential-required yields, hooks, child workflow runs, or manifest metadata.
4
+ ---
5
+
6
+ # Keystroke Workflow-As-Tool Debugging
7
+
8
+ Use this skill when a workflow tool misbehaves, yields unexpectedly, fails to resume, returns a ref unexpectedly, or shows confusing manifest metadata.
9
+
10
+ Related skills:
11
+ - use `../keystroke-agent-authoring/SKILL.md` for authored agent code
12
+ - use `../keystroke-workflow-authoring/SKILL.md` for authored workflow code
13
+ - use `../keystroke-data-toolkit/SKILL.md` for refs and large outputs
14
+ - use `../keystroke-cli-workspace/SKILL.md` for live CLI command usage
15
+
16
+ ## Debugging Model
17
+
18
+ A workflow listed on `Agent.tools` is enriched at build time, exposed in the agent manifest, and dispatched through host calls at runtime.
19
+
20
+ Key runtime paths:
21
+ - sync workflow tool: child workflow completes and returns a normal tool result
22
+ - yield workflow tool: child workflow suspends, agent run stores yield state, and `agent_continue` resumes later
23
+ - snapshot workflow tool: `midSessionSnapshot: true` records snapshot metadata and `agent_resume` resumes through scoped conversation-log replay
24
+ - large output: workflow result may become a ref envelope and be inspected through data toolkit tools
25
+
26
+ Current Phase D snapshot behavior is Path B prompt replay. Do not assume native Pi process restore or direct message-history injection exists.
27
+
28
+ ## Default Triage
29
+
30
+ 1. Inspect the built agent manifest: `keystroke agents inspect <agentId> --json`. Confirm `sourceKind`, invocation mode, workflow id, deployment metadata, credentials, `largeResultMode`, auto-injected tools, and `midSessionSnapshot`.
31
+ 2. Inspect the agent run: `keystroke runs inspect <arun_...> --json`. Check status, pending yield state, snapshot id, tool call id, child workflow run id, child run idempotency key, latest snapshot metadata, and recent agent events.
32
+ 3. Inspect the child workflow run: `keystroke runs inspect <wrun_...> --json`. Check terminal state, suspension event, hook token, missing credentials, output cap/ref behavior, or failure envelope.
33
+ 4. Check whether the result projector should enqueue `agent_continue` or `agent_resume`.
34
+ 5. Check logs/traces by `agent_run_id`, `tool_call_id`, `workflow_run_id`, and host-call kind.
35
+
36
+ ## Common Findings
37
+
38
+ - If the tool returned `pending: true`, the model should acknowledge and end the turn; it should not retry the same call in the same turn.
39
+ - If a hook yield is pending, the next user response should be mediated through `provide_workflow_response`.
40
+ - If a credential-required yield is pending, the user must connect the missing credential and then retry.
41
+ - If a snapshot run is `suspended_snapshotted`, completion should enqueue `agent_resume`, not `agent_continue`.
42
+ - If the output is too large and `largeResultMode` is not `ref`, expect a cap rejection.
43
+ - If a reducer call appears, expect unsupported validation guidance; reducers are deferred.
44
+
45
+ ## Snapshot Replay Notes
46
+
47
+ `keystroke runs inspect <arun_...>` reports `replayModel: "conversation-log"` and latest snapshot metadata when present. Synthetic snapshot storage keys may use an `agent-snapshot://...` URI, but that is only a storage/debug identifier for captured conversation-log replay data. It is not a native Pi process snapshot, and operators should not describe it as native restore.
48
+
49
+ ## References
50
+
51
+ Read these files as needed:
52
+ - `references/playbook.md` for a focused workflow-tool debugging checklist
@@ -0,0 +1,23 @@
1
+ {
2
+ "skill_name": "keystroke-workflow-as-tool-debugging",
3
+ "evals": [
4
+ {
5
+ "id": 1,
6
+ "prompt": "A workflow tool with ctx.wait returned pending: true and the agent tried to call it again. What should I inspect and what behavior is expected?",
7
+ "expected_output": "Explains yield receipt behavior, turn ending, idempotency, pending yield state, child workflow run, and agent_continue resume path.",
8
+ "files": []
9
+ },
10
+ {
11
+ "id": 2,
12
+ "prompt": "My midSessionSnapshot workflow tool is suspended_snapshotted. When the child workflow completes, which worker should resume it?",
13
+ "expected_output": "States that agent_resume handles current snapshots through Path B conversation-log replay and does not claim native Pi process restore.",
14
+ "files": []
15
+ },
16
+ {
17
+ "id": 3,
18
+ "prompt": "A workflow tool returned a ref and the model wants to query it with a reducer. What should I do?",
19
+ "expected_output": "Recommends describe_ref/read_ref/slice_ref with bounded ranges and states reducers/DuckDB are deferred and unsupported.",
20
+ "files": []
21
+ }
22
+ ]
23
+ }
@@ -0,0 +1,77 @@
1
+ # Workflow-As-Tool Debugging Playbook
2
+
3
+ Read this file when debugging a concrete workflow-tool failure.
4
+
5
+ ## Manifest Checklist
6
+
7
+ Verify the tool entry:
8
+ - `id` is the expected tool id and unique within the agent
9
+ - `sourceKind` is `workflow`
10
+ - workflow deployment metadata points at the intended deployed workflow
11
+ - invocation mode matches the workflow: sync, yield, or snapshot-enabled yield
12
+ - credential sets are classified as required or conditional as expected
13
+ - `largeResultMode` matches the workflow's output expectations
14
+ - auto-injected tools include data toolkit tools for workflow/ref-eligible tools
15
+ - yield companion tools appear when yield-mode tools are present
16
+ - `midSessionSnapshot` appears only for workflows that explicitly opt in
17
+
18
+ ## Runtime Checklist
19
+
20
+ Start with:
21
+
22
+ ```bash
23
+ keystroke runs inspect <arun_...> --json
24
+ ```
25
+
26
+ For the agent run:
27
+ - status should explain the active path: running, suspended yield, suspended snapshot, failed, cancelled, expired, or completed
28
+ - pending yield state should include tool call id, child run id, turn index, kind, and snapshot id when applicable
29
+ - child workflow idempotency should flow through `workflow_runs.parent_idempotency_key`
30
+ - latest snapshot metadata should say `piSessionStateKind: "conversation-log"` and `replayModel: "conversation-log"` when Path B replay is active
31
+ - no code should rely on `agent_tool_call_results`
32
+ - no code should rely on `agent_runs.pinned_workflow_deployments`
33
+
34
+ For the child workflow run:
35
+
36
+ ```bash
37
+ keystroke runs inspect <wrun_...> --json
38
+ ```
39
+
40
+ - confirm whether it completed, failed, cancelled, hit a wait, hit a hook, or needed credentials
41
+ - confirm whether output was returned inline, rejected by the cap, or stored as a ref
42
+ - for ref outputs, verify reads are scoped to the same agent run and bounded
43
+
44
+ ## Continue vs Resume
45
+
46
+ Use `agent_continue` for normal Phase C yield rehydration.
47
+
48
+ Use `agent_resume` only when the parent agent run has a current snapshot. The active Phase D implementation uses captured conversation-log snapshot events, pending host-call metadata, and a synthetic resolved tool result in a scoped replay prompt.
49
+
50
+ Do not claim that native Pi process restore exists. Native restore remains a future replacement if Pi exposes save/restore or message-history injection APIs.
51
+
52
+ The synthetic `agent-snapshot://...` storage key names a captured conversation-log replay artifact. It does not mean a native Pi session snapshot exists.
53
+
54
+ ## Large Output Issues
55
+
56
+ If a workflow returns too much data:
57
+ - with `largeResultMode: 'reject'`, the cap rejection is expected
58
+ - with `largeResultMode: 'ref'`, expect a ref envelope
59
+ - use `describe_ref` before `read_ref`
60
+ - when `read_ref` returns `truncated: true`, narrow the range
61
+ - do not add reducer tools or DuckDB to solve this unless reducers are explicitly reopened
62
+
63
+ ## Credential Issues
64
+
65
+ Required credentials should be aggregated during build/enrichment and block deploy when unavailable.
66
+
67
+ Optional credentials should use literal guards:
68
+
69
+ ```ts
70
+ if (ctx.hasCredentialSet('slack')) {
71
+ await sendSlackMessage.run({ channel, text });
72
+ }
73
+ ```
74
+
75
+ Literal `ctx.hasCredentialSet('id')` guards classify matching credentials as conditional. Dynamic ids are not recognized.
76
+
77
+ Credential-required yields should guide the user to connect credentials, then retry the workflow tool after connection.
@@ -0,0 +1,234 @@
1
+ ---
2
+ name: keystroke-workflow-authoring
3
+ description: Build Keystroke workflows and steps with the public @keystrokehq/core API. Use when the user wants to author, refactor, explain, or test workflow code or step code, including replay-safe orchestration, waits, hooks, child workflows, workflowGlobals, and workflow entry through triggers.
4
+ ---
5
+
6
+ # Keystroke Workflow Authoring
7
+
8
+ Use this skill when an agent needs to write or change Keystroke workflow code.
9
+
10
+ Keep this skill focused on authored workflow and step code:
11
+ - use `../keystroke-trigger-authoring/SKILL.md` for trigger authoring
12
+ - use `../keystroke-task-authoring/SKILL.md` for trigger-driven agent tasks
13
+ - use `../keystroke-agent-authoring/SKILL.md` for agent and tool authoring
14
+ - use `../keystroke-data-toolkit/SKILL.md` for large workflow-tool outputs and refs
15
+ - use `../keystroke-credential-binding/SKILL.md` for credential design and binding
16
+ - use `../keystroke-cli-workspace/SKILL.md` for project setup, build, deploy, and logs
17
+
18
+ ## Quick start
19
+
20
+ Author one exported primitive per typed file.
21
+
22
+ `fetch-account.step.ts`
23
+
24
+ ```ts
25
+ import { Step } from '@keystrokehq/core';
26
+ import { z } from 'zod';
27
+
28
+ export const fetchAccount = new Step({
29
+ id: 'fetch_account',
30
+ name: 'Fetch Account',
31
+ description: 'Loads the current account status.',
32
+ input: z.object({
33
+ accountId: z.string(),
34
+ }),
35
+ output: z.object({
36
+ accountId: z.string(),
37
+ status: z.enum(['active', 'paused']),
38
+ }),
39
+ run: async (input) => ({
40
+ accountId: input.accountId,
41
+ status: 'active',
42
+ }),
43
+ });
44
+ ```
45
+
46
+ `account-sync.workflow.ts`
47
+
48
+ ```ts
49
+ import { Workflow } from '@keystrokehq/core';
50
+ import { z } from 'zod';
51
+ import { fetchAccount } from './fetch-account.step';
52
+
53
+ export const accountSyncWorkflow = new Workflow({
54
+ id: 'account-sync',
55
+ name: 'Account Sync',
56
+ description: 'Checks an account and returns a replay-safe summary.',
57
+ input: z.object({
58
+ accountId: z.string(),
59
+ waitForApproval: z.boolean().default(false),
60
+ }),
61
+ output: z.object({
62
+ accountId: z.string(),
63
+ status: z.enum(['active', 'paused']),
64
+ workflowId: z.string(),
65
+ }),
66
+ workflowGlobals: z.object({
67
+ tenantId: z.string(),
68
+ }),
69
+ run: async (input, ctx) => {
70
+ const account = await fetchAccount.run({ accountId: input.accountId });
71
+
72
+ if (input.waitForApproval) {
73
+ const approval = ctx.createHook('approval');
74
+ await approval;
75
+ }
76
+
77
+ await ctx.wait('1m');
78
+
79
+ return {
80
+ accountId: account.accountId,
81
+ status: account.status,
82
+ workflowId: ctx.workflowId,
83
+ };
84
+ },
85
+ });
86
+ ```
87
+
88
+ ## Authoring model
89
+
90
+ Teach this mental model clearly:
91
+ - `Workflow` is the orchestration boundary
92
+ - `Step` is the workflow-facing alias for `Operation`
93
+ - `Workflow.run(...)` coordinates steps, child workflows, waits, hooks, and agents
94
+ - `Step.run(...)` does low-level operational work and returns typed output
95
+ - triggers are listed in `Workflow({ triggers: [...] })`; call a trigger with `{ transform }` to bind payload mapping
96
+ - tasks are different: use `Task` when the job is “trigger -> prompt -> agent run”
97
+
98
+ `Step`, `Tool`, and `Operation` are the same class. In this skill, default to `Step` because that matches workflow author language.
99
+
100
+ Every `Step` / `Tool` / `Operation` must include a stable `id`. Use `name` for the human display label and `id` for the stable tool/step identity.
101
+
102
+ ## Runtime boundary
103
+
104
+ Workflows are for authored TypeScript orchestration code.
105
+
106
+ Teach these rules:
107
+ - a workflow cannot run bash commands as part of its authoring model
108
+ - a step is not a shell task runner
109
+ - do not teach workflows as the place to run Python, `pnpm`, or arbitrary binaries
110
+ - if the job needs shell execution, persistent filesystem state, or sandbox setup, use an agent instead
111
+
112
+
113
+ ## Default workflow process
114
+
115
+ 1. Define the workflow input, output, and stable `id`.
116
+ 2. Split the automation into orchestration and units of work.
117
+ 3. Look for an existing integration operation before writing a custom step.
118
+ 4. Put orchestration in the workflow:
119
+ - branching
120
+ - loops
121
+ - `Promise.all`
122
+ - payload shaping between boundaries
123
+ - waits and hooks
124
+ 5. Put operational work in steps, shared operations, or agents.
125
+ 6. Keep each exported primitive in its own typed file.
126
+ 7. Finish with tests using `@keystrokehq/core/vitest`.
127
+
128
+ ## Workflow rules
129
+
130
+ - Keep `Workflow.run(...)` replay-safe and deterministic.
131
+ - Do not call external systems directly from the workflow body when a step or agent boundary is more appropriate.
132
+ - Do not use `Math.random()`, `Date.now()`, direct env access, or changing mutable state inside the workflow body unless the value comes from input or runtime context.
133
+ - Do not call one step from inside another step.
134
+ - Do not call an agent from inside a step.
135
+ - Keep steps low-level and reusable.
136
+ - Use `workflowGlobals` for typed workflow-wide runtime values.
137
+ - Use `CredentialSet` for secrets and integration auth.
138
+ - Do not use `process.env` in authored workflow or step code.
139
+ - Follow Zod v4 syntax in examples and authored code. See `../../../.agents/rules/zod-v4-requirements.md`.
140
+ - Workflows can be registered as agent tools. Sync workflows return inline results; suspending workflows yield and resume later.
141
+ - Add `largeResultMode: 'ref'` when a workflow tool may return large data; agents inspect refs with `describe_ref`, `read_ref`, and `slice_ref`.
142
+ - Add `midSessionSnapshot: true` only for workflow tools that have measured benefit from preserving mid-tool-call reasoning state. The default turn-boundary yield path is simpler and should remain the default.
143
+ - Use `ctx.hasCredentialSet('credentialId')` to wrap genuinely optional credential-dependent branches. Literal guards are recognized by build analysis and classify matching credentials as conditional.
144
+
145
+ ## Workflows as agent tools
146
+
147
+ Agents can list workflows directly in `tools`. Build enrichment classifies each workflow from its flow graph, aggregates credentials, pins the deployed workflow in manifest metadata, and injects companion tools when needed.
148
+
149
+ Teach these behaviors:
150
+ - non-suspending workflow tools return normal tool results
151
+ - workflows with `ctx.wait`, `ctx.createHook`, missing credentials, or suspending child work yield with a `pending: true` receipt and resume later through `agent_continue`
152
+ - `midSessionSnapshot: true` opts a workflow into the Phase D snapshot path; current implementation uses conversation-log replay, not native Pi process restore
153
+ - credential-required yields ask the user to connect missing credentials, then retry the workflow tool
154
+ - hook yields ask the user for the requested decision/input, then continue the workflow
155
+
156
+ When adding hook or wait points that may run as agent tools, include intent and reason metadata so the platform can explain the suspension clearly:
157
+
158
+ ```ts
159
+ await ctx.wait('30m', {
160
+ intent: 'data-settlement',
161
+ reason: 'The provider settles events before final reconciliation.',
162
+ });
163
+
164
+ const approval = ctx.createHook('approval', {
165
+ intent: 'human-decision',
166
+ prompt: 'Approve refund above the policy threshold?',
167
+ reason: 'Refunds above policy require manual approval.',
168
+ });
169
+ ```
170
+
171
+ ## Optional credentials
172
+
173
+ Use `ctx.hasCredentialSet(id)` only when the workflow is intentionally useful without that credential. Keep the id literal so build analysis can classify the branch:
174
+
175
+ ```ts
176
+ if (ctx.hasCredentialSet('slack')) {
177
+ await sendSlackMessage.run({ channel: input.channel, text: input.summary });
178
+ }
179
+ ```
180
+
181
+ Do not use this helper to hide required credentials. If the workflow cannot complete without a credential, let normal required credential aggregation or credential-required yield handle it.
182
+
183
+ ## Agent Guidelines for Custom Operations & Workflows
184
+
185
+ When an agent needs to write custom steps, operations, tools, or workflows, it must follow these rules:
186
+ 1. **Always use prebuilt steps/operations** if they exist before writing custom ones.
187
+ 2. **Collect context first**: Do you have all the information you need from the user? If not, ask the user to clarify what they are looking for. **Do Not Guess**.
188
+ 3. **Understand API payloads**: If using an API endpoint to fetch data, search the provider's docs to understand the payloads. If possible, hit available endpoints to inspect the actual payloads.
189
+ 4. **Always write and run tests**: You must always write tests for new workflows, and custom steps/operations/tools. Always run the tests to make sure that the new steps and workflows run (see `references/testing.md`).
190
+ 5. **Handle missing credentials**: If you cannot run tests because of missing credentials, ask the user to configure them following the `../keystroke-credential-binding/SKILL.md` skill. The user will need to upload credentials before deploying anyway.
191
+
192
+ ## Manual API Execution
193
+
194
+ Every workflow can be executed on-demand via the API without requiring a trigger. Triggers are only required for automated entry (webhooks, schedules, provider events).
195
+
196
+ To explicitly start a workflow via API:
197
+ - **Endpoint**: `POST /api/v1/workflows/execute`
198
+ - **Body**:
199
+ - `projectId`: The Keystroke project ID
200
+ - `authoredWorkflowId`: The stable ID defined in `new Workflow({ id: ... })`
201
+ - `args`: Array containing the input payload (e.g. `[ { "accountId": "123" } ]`)
202
+ - `workflowGlobals`: (Optional) Object matching the workflow's globals schema
203
+ - `credentialBindings`: (Optional) Custom credential bindings for the execution
204
+
205
+ ## When to choose a workflow
206
+
207
+ Choose a workflow when the automation needs:
208
+ - durable orchestration across multiple steps
209
+ - waits or human approval hooks
210
+ - branching and composition
211
+ - child workflows
212
+ - a replay-safe control flow that coordinates steps and agents
213
+
214
+ Do not choose a workflow when the real job is:
215
+ - one prompt-driven agent run from a trigger
216
+ - a long-lived conversation entrypoint
217
+ - shell-heavy filesystem work better suited to a sandboxed agent
218
+
219
+ ## Testing path
220
+
221
+ Default testing guidance:
222
+ - use `keystrokeTestPlugin()` in Vitest config
223
+ - use `createTestRuntime()` when a workflow needs explicit runtime values
224
+ - use `createTestStepContext()` for isolated step tests
225
+ - test trigger-to-workflow mapping by creating a bound trigger and calling `bound.transform?.(payload)`
226
+
227
+ ## References
228
+
229
+ Read these files as needed:
230
+ - `references/source-map.md` for the public workflow and step surface
231
+ - `references/patterns.md` for field-by-field workflow and step examples
232
+ - `references/runtime-helpers.md` for `ctx.wait`, hooks, globals, workflow ids, and runtime boundaries
233
+ - `references/prebuilt-integrations.md` for current integration operations to import before writing custom steps
234
+ - `references/testing.md` for public testing examples
@@ -0,0 +1,29 @@
1
+ {
2
+ "skill_name": "keystroke-workflow-authoring",
3
+ "evals": [
4
+ {
5
+ "id": 1,
6
+ "prompt": "I'm building a Keystroke workflow that checks a few upstream APIs, waits 10 minutes between retries, and then posts the result to Slack. Can you show me how to structure the workflow and what should be steps versus workflow orchestration?",
7
+ "expected_output": "Explains workflow planning, step boundaries, replay-safe orchestration, the durable wait path, and where Slack or other external work belongs.",
8
+ "files": []
9
+ },
10
+ {
11
+ "id": 2,
12
+ "prompt": "I have a workflow that uses Math.random() and Date.now() inside Workflow.run to make request ids. Why is that a problem in Keystroke, and how should I rewrite it?",
13
+ "expected_output": "Explains workflow replay safety, why nondeterministic logic should not live in the workflow body, and how to move it into a step or agent boundary.",
14
+ "files": []
15
+ },
16
+ {
17
+ "id": 3,
18
+ "prompt": "How do I test a Keystroke workflow that uses workflowGlobals and a webhook trigger? I want the recommended testing path, not a random custom harness.",
19
+ "expected_output": "Uses core Vitest helpers, covers workflowGlobals, and points to bound trigger transform testing via bound.transform?.(payload, ctx).",
20
+ "files": []
21
+ },
22
+ {
23
+ "id": 4,
24
+ "prompt": "Can my Keystroke workflow run bash to call a Python script, or should I structure this differently?",
25
+ "expected_output": "Explains that workflows are authored as TypeScript orchestration and do not run bash as part of the workflow model, then routes shell-heavy work to an agent sandbox.",
26
+ "files": []
27
+ }
28
+ ]
29
+ }