npm - @keystrokehq/skills - Versions diffs - 0.0.1 - Mend

@keystrokehq/skills 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

package/AGENTS-blurb.md +123 -0
package/LICENSE +21 -0
package/README.md +63 -0
package/keystroke-agent-authoring/SKILL.md +225 -0
package/keystroke-agent-authoring/evals/evals.json +29 -0
package/keystroke-agent-authoring/references/messaging-gateways.md +242 -0
package/keystroke-agent-authoring/references/patterns.md +417 -0
package/keystroke-agent-authoring/references/prebuilt-integrations.md +879 -0
package/keystroke-agent-authoring/references/sandbox-and-mcp.md +214 -0
package/keystroke-agent-authoring/references/source-map.md +182 -0
package/keystroke-agent-authoring/references/testing.md +85 -0
package/keystroke-cli-workspace/SKILL.md +93 -0
package/keystroke-cli-workspace/evals/evals.json +23 -0
package/keystroke-cli-workspace/references/command-map.md +50 -0
package/keystroke-cli-workspace/references/credentials-and-connect.md +79 -0
package/keystroke-cli-workspace/references/project-lifecycle.md +85 -0
package/keystroke-credential-binding/SKILL.md +509 -0
package/keystroke-credential-binding/evals/evals.json +29 -0
package/keystroke-credential-binding/references/cli.md +85 -0
package/keystroke-credential-binding/references/patterns.md +878 -0
package/keystroke-credential-binding/references/source-map.md +69 -0
package/keystroke-data-toolkit/SKILL.md +59 -0
package/keystroke-data-toolkit/evals/evals.json +23 -0
package/keystroke-data-toolkit/references/usage.md +79 -0
package/keystroke-task-authoring/SKILL.md +124 -0
package/keystroke-task-authoring/evals/evals.json +23 -0
package/keystroke-task-authoring/references/patterns.md +132 -0
package/keystroke-task-authoring/references/source-map.md +61 -0
package/keystroke-trigger-authoring/SKILL.md +189 -0
package/keystroke-trigger-authoring/evals/evals.json +29 -0
package/keystroke-trigger-authoring/references/patterns.md +265 -0
package/keystroke-trigger-authoring/references/source-map.md +128 -0
package/keystroke-trigger-authoring/references/testing.md +148 -0
package/keystroke-workflow-as-tool-debugging/SKILL.md +52 -0
package/keystroke-workflow-as-tool-debugging/evals/evals.json +23 -0
package/keystroke-workflow-as-tool-debugging/references/playbook.md +77 -0
package/keystroke-workflow-authoring/SKILL.md +234 -0
package/keystroke-workflow-authoring/evals/evals.json +29 -0
package/keystroke-workflow-authoring/references/patterns.md +265 -0
package/keystroke-workflow-authoring/references/prebuilt-integrations.md +811 -0
package/keystroke-workflow-authoring/references/runtime-helpers.md +264 -0
package/keystroke-workflow-authoring/references/source-map.md +108 -0
package/keystroke-workflow-authoring/references/testing.md +108 -0
package/package.json +26 -0

package/keystroke-trigger-authoring/references/testing.md ADDED Viewed

@@ -0,0 +1,148 @@
+# Trigger Testing
+Read this file when the user asks how to test a Keystroke trigger.
+Assume `paymentWebhook`, `orderPolling`, and `paymentWorkflow` are the public trigger and workflow instances shown elsewhere in this skill.
+## Vitest setup
+`keystrokeTestPlugin()` adds the core test setup file to Vitest, which is required for credential resolution and context mocking.
+```ts
+import { defineConfig } from 'vitest/config';
+import { keystrokeTestPlugin } from '@keystrokehq/core/vitest';
+export default defineConfig({
+  plugins: [keystrokeTestPlugin()],
+});
+```
+## Test webhook verification and filtering
+```ts
+const request = {
+  headers: {
+    'x-signature': 'signed',
+  },
+  query: {},
+  rawBody: JSON.stringify({
+    id: 'evt_123',
+    type: 'payment.completed',
+    amount: 5000,
+  }),
+  method: 'POST',
+  path: '/payments',
+};
+await paymentWebhook.verify?.(request, {
+  credentials: {},
+  triggerName: paymentWebhook.name,
+  triggerType: 'webhook',
+});
+const payload = paymentWebhook.payload.parse(JSON.parse(request.rawBody));
+const shouldRun = paymentWebhook.filter?.(payload, request);
+```
+This isolates the webhook-only concerns:
+- authenticity checks in `verify`
+- parsing via `trigger.payload.parse(JSON.parse(request.rawBody))`
+- event gating in `filter`
+## Test polling behavior
+```ts
+const response = await orderPolling.poll({
+  credentials: {},
+  triggerName: orderPolling.name,
+  triggerType: 'polling',
+  lastPolledAt: new Date().toISOString(),
+  lastResponse: {
+    orderId: 'order_123',
+    status: 'created',
+  },
+});
+const payload = orderPolling.parseResponse(response);
+```
+This isolates the polling-only concerns:
+- what `poll(...)` returns
+- how prior state affects the next poll
+- whether the response matches the trigger schema
+## Test bound trigger transform
+Create a bound trigger by calling the trigger as a function with `{ transform }`, then call `bound.transform?.(payload)` to test the mapping:
+```ts
+const bound = paymentWebhook({
+  transform: (payload) => ({
+    eventId: payload.id,
+    amount: payload.amount,
+  }),
+});
+const workflowInput = bound.transform?.(
+  { id: 'evt_123', type: 'payment.completed', amount: 5000 },
+);
+expect(workflowInput).toEqual({ eventId: 'evt_123', amount: 5000 });
+```
+This tests only the mapping from trigger payload to workflow input.
+## Test bound trigger transform with request context
+```ts
+const bound = paymentWebhook({
+  transform: (payload, request) => ({
+    eventId: payload.id,
+    amount: payload.amount,
+  }),
+});
+const workflowInput = bound.transform?.(
+  { id: 'evt_123', type: 'payment.completed', amount: 5000 },
+  request,
+);
+```
+Pass the optional second argument when the transform depends on webhook request data (headers, query params, etc.). Note that only `verify` receives credentials — `transform`, `filter`, and `idempotencyKey` do not.
+## Full webhook-to-workflow test
+```ts
+it('maps a valid webhook event into workflow input', async () => {
+  const request = mockRequest({
+    headers: { 'x-signature': 'signed' },
+    rawBody: JSON.stringify({ id: 'evt_123', type: 'payment.completed', amount: 5000 }),
+  });
+  await paymentWebhook.verify?.(request, {
+    credentials: {},
+    triggerName: paymentWebhook.name,
+    triggerType: 'webhook',
+  });
+  const payload = paymentWebhook.payload.parse(JSON.parse(request.rawBody));
+  const passed = paymentWebhook.filter?.(payload, request);
+  expect(passed).toBe(true);
+  const bound = paymentWebhook({
+    transform: (p) => ({ eventId: p.id, amount: p.amount }),
+  });
+  const workflowInput = bound.transform?.(payload);
+  expect(workflowInput).toEqual({ eventId: 'evt_123', amount: 5000 });
+});
+```
+## What to validate
+- webhook verification success and failure
+- filter behavior
+- idempotency key behavior when defined
+- polling response shape
+- transform correctness from trigger payload to workflow input

package/keystroke-workflow-as-tool-debugging/SKILL.md ADDED Viewed

@@ -0,0 +1,52 @@
+---
+name: keystroke-workflow-as-tool-debugging
+description: Debug Keystroke workflows used as agent tools. Use when investigating workflow-tool eligibility, yield/resume behavior, agent_continue, agent_resume, refs, credential-required yields, hooks, child workflow runs, or manifest metadata.
+---
+# Keystroke Workflow-As-Tool Debugging
+Use this skill when a workflow tool misbehaves, yields unexpectedly, fails to resume, returns a ref unexpectedly, or shows confusing manifest metadata.
+Related skills:
+- use `../keystroke-agent-authoring/SKILL.md` for authored agent code
+- use `../keystroke-workflow-authoring/SKILL.md` for authored workflow code
+- use `../keystroke-data-toolkit/SKILL.md` for refs and large outputs
+- use `../keystroke-cli-workspace/SKILL.md` for live CLI command usage
+## Debugging Model
+A workflow listed on `Agent.tools` is enriched at build time, exposed in the agent manifest, and dispatched through host calls at runtime.
+Key runtime paths:
+- sync workflow tool: child workflow completes and returns a normal tool result
+- yield workflow tool: child workflow suspends, agent run stores yield state, and `agent_continue` resumes later
+- snapshot workflow tool: `midSessionSnapshot: true` records snapshot metadata and `agent_resume` resumes through scoped conversation-log replay
+- large output: workflow result may become a ref envelope and be inspected through data toolkit tools
+Current Phase D snapshot behavior is Path B prompt replay. Do not assume native Pi process restore or direct message-history injection exists.
+## Default Triage
+1. Inspect the built agent manifest: `keystroke agents inspect <agentId> --json`. Confirm `sourceKind`, invocation mode, workflow id, deployment metadata, credentials, `largeResultMode`, auto-injected tools, and `midSessionSnapshot`.
+2. Inspect the agent run: `keystroke runs inspect <arun_...> --json`. Check status, pending yield state, snapshot id, tool call id, child workflow run id, child run idempotency key, latest snapshot metadata, and recent agent events.
+3. Inspect the child workflow run: `keystroke runs inspect <wrun_...> --json`. Check terminal state, suspension event, hook token, missing credentials, output cap/ref behavior, or failure envelope.
+4. Check whether the result projector should enqueue `agent_continue` or `agent_resume`.
+5. Check logs/traces by `agent_run_id`, `tool_call_id`, `workflow_run_id`, and host-call kind.
+## Common Findings
+- If the tool returned `pending: true`, the model should acknowledge and end the turn; it should not retry the same call in the same turn.
+- If a hook yield is pending, the next user response should be mediated through `provide_workflow_response`.
+- If a credential-required yield is pending, the user must connect the missing credential and then retry.
+- If a snapshot run is `suspended_snapshotted`, completion should enqueue `agent_resume`, not `agent_continue`.
+- If the output is too large and `largeResultMode` is not `ref`, expect a cap rejection.
+- If a reducer call appears, expect unsupported validation guidance; reducers are deferred.
+## Snapshot Replay Notes
+`keystroke runs inspect <arun_...>` reports `replayModel: "conversation-log"` and latest snapshot metadata when present. Synthetic snapshot storage keys may use an `agent-snapshot://...` URI, but that is only a storage/debug identifier for captured conversation-log replay data. It is not a native Pi process snapshot, and operators should not describe it as native restore.
+## References
+Read these files as needed:
+- `references/playbook.md` for a focused workflow-tool debugging checklist

package/keystroke-workflow-as-tool-debugging/evals/evals.json ADDED Viewed

@@ -0,0 +1,23 @@
+{
+  "skill_name": "keystroke-workflow-as-tool-debugging",
+  "evals": [
+    {
+      "id": 1,
+      "prompt": "A workflow tool with ctx.wait returned pending: true and the agent tried to call it again. What should I inspect and what behavior is expected?",
+      "expected_output": "Explains yield receipt behavior, turn ending, idempotency, pending yield state, child workflow run, and agent_continue resume path.",
+      "files": []
+    },
+    {
+      "id": 2,
+      "prompt": "My midSessionSnapshot workflow tool is suspended_snapshotted. When the child workflow completes, which worker should resume it?",
+      "expected_output": "States that agent_resume handles current snapshots through Path B conversation-log replay and does not claim native Pi process restore.",
+      "files": []
+    },
+    {
+      "id": 3,
+      "prompt": "A workflow tool returned a ref and the model wants to query it with a reducer. What should I do?",
+      "expected_output": "Recommends describe_ref/read_ref/slice_ref with bounded ranges and states reducers/DuckDB are deferred and unsupported.",
+      "files": []
+    }
+  ]
+}

package/keystroke-workflow-as-tool-debugging/references/playbook.md ADDED Viewed

@@ -0,0 +1,77 @@
+# Workflow-As-Tool Debugging Playbook
+Read this file when debugging a concrete workflow-tool failure.
+## Manifest Checklist
+Verify the tool entry:
+- `id` is the expected tool id and unique within the agent
+- `sourceKind` is `workflow`
+- workflow deployment metadata points at the intended deployed workflow
+- invocation mode matches the workflow: sync, yield, or snapshot-enabled yield
+- credential sets are classified as required or conditional as expected
+- `largeResultMode` matches the workflow's output expectations
+- auto-injected tools include data toolkit tools for workflow/ref-eligible tools
+- yield companion tools appear when yield-mode tools are present
+- `midSessionSnapshot` appears only for workflows that explicitly opt in
+## Runtime Checklist
+Start with:
+```bash
+keystroke runs inspect <arun_...> --json
+```
+For the agent run:
+- status should explain the active path: running, suspended yield, suspended snapshot, failed, cancelled, expired, or completed
+- pending yield state should include tool call id, child run id, turn index, kind, and snapshot id when applicable
+- child workflow idempotency should flow through `workflow_runs.parent_idempotency_key`
+- latest snapshot metadata should say `piSessionStateKind: "conversation-log"` and `replayModel: "conversation-log"` when Path B replay is active
+- no code should rely on `agent_tool_call_results`
+- no code should rely on `agent_runs.pinned_workflow_deployments`
+For the child workflow run:
+```bash
+keystroke runs inspect <wrun_...> --json
+```
+- confirm whether it completed, failed, cancelled, hit a wait, hit a hook, or needed credentials
+- confirm whether output was returned inline, rejected by the cap, or stored as a ref
+- for ref outputs, verify reads are scoped to the same agent run and bounded
+## Continue vs Resume
+Use `agent_continue` for normal Phase C yield rehydration.
+Use `agent_resume` only when the parent agent run has a current snapshot. The active Phase D implementation uses captured conversation-log snapshot events, pending host-call metadata, and a synthetic resolved tool result in a scoped replay prompt.
+Do not claim that native Pi process restore exists. Native restore remains a future replacement if Pi exposes save/restore or message-history injection APIs.
+The synthetic `agent-snapshot://...` storage key names a captured conversation-log replay artifact. It does not mean a native Pi session snapshot exists.
+## Large Output Issues
+If a workflow returns too much data:
+- with `largeResultMode: 'reject'`, the cap rejection is expected
+- with `largeResultMode: 'ref'`, expect a ref envelope
+- use `describe_ref` before `read_ref`
+- when `read_ref` returns `truncated: true`, narrow the range
+- do not add reducer tools or DuckDB to solve this unless reducers are explicitly reopened
+## Credential Issues
+Required credentials should be aggregated during build/enrichment and block deploy when unavailable.
+Optional credentials should use literal guards:
+```ts
+if (ctx.hasCredentialSet('slack')) {
+  await sendSlackMessage.run({ channel, text });
+}
+```
+Literal `ctx.hasCredentialSet('id')` guards classify matching credentials as conditional. Dynamic ids are not recognized.
+Credential-required yields should guide the user to connect credentials, then retry the workflow tool after connection.

package/keystroke-workflow-authoring/SKILL.md ADDED Viewed

@@ -0,0 +1,234 @@
+---
+name: keystroke-workflow-authoring
+description: Build Keystroke workflows and steps with the public @keystrokehq/core API. Use when the user wants to author, refactor, explain, or test workflow code or step code, including replay-safe orchestration, waits, hooks, child workflows, workflowGlobals, and workflow entry through triggers.
+---
+# Keystroke Workflow Authoring
+Use this skill when an agent needs to write or change Keystroke workflow code.
+Keep this skill focused on authored workflow and step code:
+- use `../keystroke-trigger-authoring/SKILL.md` for trigger authoring
+- use `../keystroke-task-authoring/SKILL.md` for trigger-driven agent tasks
+- use `../keystroke-agent-authoring/SKILL.md` for agent and tool authoring
+- use `../keystroke-data-toolkit/SKILL.md` for large workflow-tool outputs and refs
+- use `../keystroke-credential-binding/SKILL.md` for credential design and binding
+- use `../keystroke-cli-workspace/SKILL.md` for project setup, build, deploy, and logs
+## Quick start
+Author one exported primitive per typed file.
+`fetch-account.step.ts`
+```ts
+import { Step } from '@keystrokehq/core';
+import { z } from 'zod';
+export const fetchAccount = new Step({
+  id: 'fetch_account',
+  name: 'Fetch Account',
+  description: 'Loads the current account status.',
+  input: z.object({
+    accountId: z.string(),
+  }),
+  output: z.object({
+    accountId: z.string(),
+    status: z.enum(['active', 'paused']),
+  }),
+  run: async (input) => ({
+    accountId: input.accountId,
+    status: 'active',
+  }),
+});
+```
+`account-sync.workflow.ts`
+```ts
+import { Workflow } from '@keystrokehq/core';
+import { z } from 'zod';
+import { fetchAccount } from './fetch-account.step';
+export const accountSyncWorkflow = new Workflow({
+  id: 'account-sync',
+  name: 'Account Sync',
+  description: 'Checks an account and returns a replay-safe summary.',
+  input: z.object({
+    accountId: z.string(),
+    waitForApproval: z.boolean().default(false),
+  }),
+  output: z.object({
+    accountId: z.string(),
+    status: z.enum(['active', 'paused']),
+    workflowId: z.string(),
+  }),
+  workflowGlobals: z.object({
+    tenantId: z.string(),
+  }),
+  run: async (input, ctx) => {
+    const account = await fetchAccount.run({ accountId: input.accountId });
+    if (input.waitForApproval) {
+      const approval = ctx.createHook('approval');
+      await approval;
+    }
+    await ctx.wait('1m');
+    return {
+      accountId: account.accountId,
+      status: account.status,
+      workflowId: ctx.workflowId,
+    };
+  },
+});
+```
+## Authoring model
+Teach this mental model clearly:
+- `Workflow` is the orchestration boundary
+- `Step` is the workflow-facing alias for `Operation`
+- `Workflow.run(...)` coordinates steps, child workflows, waits, hooks, and agents
+- `Step.run(...)` does low-level operational work and returns typed output
+- triggers are listed in `Workflow({ triggers: [...] })`; call a trigger with `{ transform }` to bind payload mapping
+- tasks are different: use `Task` when the job is “trigger -> prompt -> agent run”
+`Step`, `Tool`, and `Operation` are the same class. In this skill, default to `Step` because that matches workflow author language.
+Every `Step` / `Tool` / `Operation` must include a stable `id`. Use `name` for the human display label and `id` for the stable tool/step identity.
+## Runtime boundary
+Workflows are for authored TypeScript orchestration code.
+Teach these rules:
+- a workflow cannot run bash commands as part of its authoring model
+- a step is not a shell task runner
+- do not teach workflows as the place to run Python, `pnpm`, or arbitrary binaries
+- if the job needs shell execution, persistent filesystem state, or sandbox setup, use an agent instead
+## Default workflow process
+1. Define the workflow input, output, and stable `id`.
+2. Split the automation into orchestration and units of work.
+3. Look for an existing integration operation before writing a custom step.
+4. Put orchestration in the workflow:
+   - branching
+   - loops
+   - `Promise.all`
+   - payload shaping between boundaries
+   - waits and hooks
+5. Put operational work in steps, shared operations, or agents.
+6. Keep each exported primitive in its own typed file.
+7. Finish with tests using `@keystrokehq/core/vitest`.
+## Workflow rules
+- Keep `Workflow.run(...)` replay-safe and deterministic.
+- Do not call external systems directly from the workflow body when a step or agent boundary is more appropriate.
+- Do not use `Math.random()`, `Date.now()`, direct env access, or changing mutable state inside the workflow body unless the value comes from input or runtime context.
+- Do not call one step from inside another step.
+- Do not call an agent from inside a step.
+- Keep steps low-level and reusable.
+- Use `workflowGlobals` for typed workflow-wide runtime values.
+- Use `CredentialSet` for secrets and integration auth.
+- Do not use `process.env` in authored workflow or step code.
+- Follow Zod v4 syntax in examples and authored code. See `../../../.agents/rules/zod-v4-requirements.md`.
+- Workflows can be registered as agent tools. Sync workflows return inline results; suspending workflows yield and resume later.
+- Add `largeResultMode: 'ref'` when a workflow tool may return large data; agents inspect refs with `describe_ref`, `read_ref`, and `slice_ref`.
+- Add `midSessionSnapshot: true` only for workflow tools that have measured benefit from preserving mid-tool-call reasoning state. The default turn-boundary yield path is simpler and should remain the default.
+- Use `ctx.hasCredentialSet('credentialId')` to wrap genuinely optional credential-dependent branches. Literal guards are recognized by build analysis and classify matching credentials as conditional.
+## Workflows as agent tools
+Agents can list workflows directly in `tools`. Build enrichment classifies each workflow from its flow graph, aggregates credentials, pins the deployed workflow in manifest metadata, and injects companion tools when needed.
+Teach these behaviors:
+- non-suspending workflow tools return normal tool results
+- workflows with `ctx.wait`, `ctx.createHook`, missing credentials, or suspending child work yield with a `pending: true` receipt and resume later through `agent_continue`
+- `midSessionSnapshot: true` opts a workflow into the Phase D snapshot path; current implementation uses conversation-log replay, not native Pi process restore
+- credential-required yields ask the user to connect missing credentials, then retry the workflow tool
+- hook yields ask the user for the requested decision/input, then continue the workflow
+When adding hook or wait points that may run as agent tools, include intent and reason metadata so the platform can explain the suspension clearly:
+```ts
+await ctx.wait('30m', {
+  intent: 'data-settlement',
+  reason: 'The provider settles events before final reconciliation.',
+});
+const approval = ctx.createHook('approval', {
+  intent: 'human-decision',
+  prompt: 'Approve refund above the policy threshold?',
+  reason: 'Refunds above policy require manual approval.',
+});
+```
+## Optional credentials
+Use `ctx.hasCredentialSet(id)` only when the workflow is intentionally useful without that credential. Keep the id literal so build analysis can classify the branch:
+```ts
+if (ctx.hasCredentialSet('slack')) {
+  await sendSlackMessage.run({ channel: input.channel, text: input.summary });
+}
+```
+Do not use this helper to hide required credentials. If the workflow cannot complete without a credential, let normal required credential aggregation or credential-required yield handle it.
+## Agent Guidelines for Custom Operations & Workflows
+When an agent needs to write custom steps, operations, tools, or workflows, it must follow these rules:
+1. **Always use prebuilt steps/operations** if they exist before writing custom ones.
+2. **Collect context first**: Do you have all the information you need from the user? If not, ask the user to clarify what they are looking for. **Do Not Guess**.
+3. **Understand API payloads**: If using an API endpoint to fetch data, search the provider's docs to understand the payloads. If possible, hit available endpoints to inspect the actual payloads.
+4. **Always write and run tests**: You must always write tests for new workflows, and custom steps/operations/tools. Always run the tests to make sure that the new steps and workflows run (see `references/testing.md`).
+5. **Handle missing credentials**: If you cannot run tests because of missing credentials, ask the user to configure them following the `../keystroke-credential-binding/SKILL.md` skill. The user will need to upload credentials before deploying anyway.
+## Manual API Execution
+Every workflow can be executed on-demand via the API without requiring a trigger. Triggers are only required for automated entry (webhooks, schedules, provider events).
+To explicitly start a workflow via API:
+- **Endpoint**: `POST /api/v1/workflows/execute`
+- **Body**:
+  - `projectId`: The Keystroke project ID
+  - `authoredWorkflowId`: The stable ID defined in `new Workflow({ id: ... })`
+  - `args`: Array containing the input payload (e.g. `[ { "accountId": "123" } ]`)
+  - `workflowGlobals`: (Optional) Object matching the workflow's globals schema
+  - `credentialBindings`: (Optional) Custom credential bindings for the execution
+## When to choose a workflow
+Choose a workflow when the automation needs:
+- durable orchestration across multiple steps
+- waits or human approval hooks
+- branching and composition
+- child workflows
+- a replay-safe control flow that coordinates steps and agents
+Do not choose a workflow when the real job is:
+- one prompt-driven agent run from a trigger
+- a long-lived conversation entrypoint
+- shell-heavy filesystem work better suited to a sandboxed agent
+## Testing path
+Default testing guidance:
+- use `keystrokeTestPlugin()` in Vitest config
+- use `createTestRuntime()` when a workflow needs explicit runtime values
+- use `createTestStepContext()` for isolated step tests
+- test trigger-to-workflow mapping by creating a bound trigger and calling `bound.transform?.(payload)`
+## References
+Read these files as needed:
+- `references/source-map.md` for the public workflow and step surface
+- `references/patterns.md` for field-by-field workflow and step examples
+- `references/runtime-helpers.md` for `ctx.wait`, hooks, globals, workflow ids, and runtime boundaries
+- `references/prebuilt-integrations.md` for current integration operations to import before writing custom steps
+- `references/testing.md` for public testing examples

package/keystroke-workflow-authoring/evals/evals.json ADDED Viewed

@@ -0,0 +1,29 @@
+{
+  "skill_name": "keystroke-workflow-authoring",
+  "evals": [
+    {
+      "id": 1,
+      "prompt": "I'm building a Keystroke workflow that checks a few upstream APIs, waits 10 minutes between retries, and then posts the result to Slack. Can you show me how to structure the workflow and what should be steps versus workflow orchestration?",
+      "expected_output": "Explains workflow planning, step boundaries, replay-safe orchestration, the durable wait path, and where Slack or other external work belongs.",
+      "files": []
+    },
+    {
+      "id": 2,
+      "prompt": "I have a workflow that uses Math.random() and Date.now() inside Workflow.run to make request ids. Why is that a problem in Keystroke, and how should I rewrite it?",
+      "expected_output": "Explains workflow replay safety, why nondeterministic logic should not live in the workflow body, and how to move it into a step or agent boundary.",
+      "files": []
+    },
+    {
+      "id": 3,
+      "prompt": "How do I test a Keystroke workflow that uses workflowGlobals and a webhook trigger? I want the recommended testing path, not a random custom harness.",
+      "expected_output": "Uses core Vitest helpers, covers workflowGlobals, and points to bound trigger transform testing via bound.transform?.(payload, ctx).",
+      "files": []
+    },
+    {
+      "id": 4,
+      "prompt": "Can my Keystroke workflow run bash to call a Python script, or should I structure this differently?",
+      "expected_output": "Explains that workflows are authored as TypeScript orchestration and do not run bash as part of the workflow model, then routes shell-heavy work to an agent sandbox.",
+      "files": []
+    }
+  ]
+}