@m8i-51/shoal 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -12
- package/bin/init.js +82 -2
- package/framework/cost.ts +15 -0
- package/framework/llm-client.ts +28 -1
- package/framework/report.ts +23 -1
- package/package.json +2 -1
- package/run.ts +28 -3
- package/server/index.ts +23 -0
- package/server/runs.ts +6 -0
- package/server/scheduler.ts +65 -0
- package/web/dist/assets/index-ehlX_Hdw.js +68 -0
- package/web/dist/index.html +1 -1
- package/web/dist/mascot.svg +53 -0
- package/web/dist/assets/index-CD6EJ_1O.js +0 -68
package/README.md
CHANGED
|
@@ -1,19 +1,21 @@
|
|
|
1
1
|
[日本語版はこちら](README_JA.md)
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
<p align="center">
|
|
4
|
+
<img src="assets/logo-lockup.svg" alt="shoal" height="72">
|
|
5
|
+
</p>
|
|
4
6
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
7
|
+
<p align="center">
|
|
8
|
+
<a href="https://www.npmjs.com/package/@m8i-51/shoal"><img src="https://img.shields.io/npm/v/@m8i-51/shoal?color=red" alt="npm"></a>
|
|
9
|
+
<a href="https://www.typescriptlang.org/"><img src="https://img.shields.io/badge/TypeScript-5-blue?logo=typescript&logoColor=white" alt="TypeScript"></a>
|
|
10
|
+
<a href="https://playwright.dev/"><img src="https://img.shields.io/badge/Playwright-browser-45ba4b?logo=playwright&logoColor=white" alt="Playwright"></a>
|
|
11
|
+
<a href="https://www.anthropic.com/"><img src="https://img.shields.io/badge/Anthropic-Claude-blueviolet?logo=anthropic&logoColor=white" alt="Anthropic"></a>
|
|
12
|
+
</p>
|
|
9
13
|
|
|
10
|
-
|
|
14
|
+
**AI agents that experience your app — and help it grow.**
|
|
11
15
|
|
|
12
|
-
shoal drops a swarm of agents onto a web app. Each agent has a distinct persona and
|
|
16
|
+
shoal drops a swarm of AI agents onto a web app. Each agent has a distinct persona and explores the app as a real user would — navigating pages, taking actions, noticing friction. They surface bugs, usability issues, missing features, and gaps between what the app does and what it's meant to achieve.
|
|
13
17
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
No test scripts. No test data. No prior knowledge of the app required.
|
|
18
|
+
No test scripts. No test data. No prior knowledge of the app required. Just a URL.
|
|
17
19
|
|
|
18
20
|
---
|
|
19
21
|
|
|
@@ -41,6 +43,21 @@ Target App (any URL)
|
|
|
41
43
|
Triage Agent
|
|
42
44
|
```
|
|
43
45
|
|
|
46
|
+
Each agent carries a distinct perspective — accessibility, security, business logic, UI design, new user experience, and more. They operate on a shared understanding of the app's purpose and goals. Coverage is tracked across runs, so each session naturally focuses on areas that haven't been explored yet.
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## What it finds
|
|
51
|
+
|
|
52
|
+
At the end of each run:
|
|
53
|
+
|
|
54
|
+
- **Bugs** — broken flows, errors, inconsistent data
|
|
55
|
+
- **UX issues** — confusing interactions, dead ends, unclear states
|
|
56
|
+
- **Feature suggestions** — things that would add real value
|
|
57
|
+
- **Goal gaps** — where the app falls short of what it's trying to achieve
|
|
58
|
+
|
|
59
|
+
Findings are filed as GitHub Issues or saved as a self-contained HTML report. A **web dashboard** lets you start runs, watch live progress, review findings by category, and track estimated LLM cost per run.
|
|
60
|
+
|
|
44
61
|
---
|
|
45
62
|
|
|
46
63
|
## Quick Start
|
|
@@ -52,7 +69,7 @@ npm install -g @m8i-51/shoal
|
|
|
52
69
|
npx playwright install chromium
|
|
53
70
|
```
|
|
54
71
|
|
|
55
|
-
Move to the project you want to
|
|
72
|
+
Move to the project you want to explore, then run:
|
|
56
73
|
|
|
57
74
|
```bash
|
|
58
75
|
cd your-project
|
|
@@ -63,7 +80,7 @@ Open `.env` and set at minimum:
|
|
|
63
80
|
|
|
64
81
|
```env
|
|
65
82
|
ANTHROPIC_API_KEY=sk-ant-...
|
|
66
|
-
BASE_URL=http://localhost:3000 # URL of the app to
|
|
83
|
+
BASE_URL=http://localhost:3000 # URL of the app to explore
|
|
67
84
|
```
|
|
68
85
|
|
|
69
86
|
Then run:
|
|
@@ -113,6 +130,7 @@ Opens at `http://localhost:4000`. From there you can:
|
|
|
113
130
|
| `ANTHROPIC_API_KEY` | — | Required |
|
|
114
131
|
| `GITHUB_TOKEN` | — | Optional — enables Issue creation |
|
|
115
132
|
| `GITHUB_REPO` | — | `owner/repo` format |
|
|
133
|
+
| `REFRESH_SPEC` | — | Set to `1` to re-run product discovery |
|
|
116
134
|
|
|
117
135
|
---
|
|
118
136
|
|
|
@@ -158,6 +176,23 @@ Alternatively, copy `targets/example.ts`, register it in `targets/index.ts`, and
|
|
|
158
176
|
|
|
159
177
|
---
|
|
160
178
|
|
|
179
|
+
## Scheduled runs
|
|
180
|
+
|
|
181
|
+
To run shoal weekly against a staging environment, add a GitHub Actions workflow to your repo.
|
|
182
|
+
|
|
183
|
+
Run `shoal init` — it will offer to generate `.github/workflows/shoal-weekly.yml` automatically. Or copy the example from this repo:
|
|
184
|
+
|
|
185
|
+
```bash
|
|
186
|
+
curl -O https://raw.githubusercontent.com/m8i-51/shoal/main/.github/workflows/shoal-weekly.example.yml
|
|
187
|
+
mv shoal-weekly.example.yml .github/workflows/shoal-weekly.yml
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
Then add `ANTHROPIC_API_KEY` to your repo's **Actions secrets** (`Settings → Secrets and variables → Actions`).
|
|
191
|
+
|
|
192
|
+
The workflow runs every Monday at 09:00 UTC and can also be triggered manually from the Actions tab. Findings are filed as GitHub Issues using the built-in `GITHUB_TOKEN`.
|
|
193
|
+
|
|
194
|
+
---
|
|
195
|
+
|
|
161
196
|
## Account Manager
|
|
162
197
|
|
|
163
198
|
For apps that require login, shoal includes an Account Manager agent that autonomously discovers and tests authentication. It finds login pages, tests credentials from `test-accounts/` (gitignored), and injects session state into explorer agents so they can reach authenticated routes.
|
|
@@ -180,12 +215,15 @@ shoal defaults to Anthropic Claude. To use a different provider, set these varia
|
|
|
180
215
|
| Provider | Variables |
|
|
181
216
|
|---|---|
|
|
182
217
|
| Anthropic (default) | `ANTHROPIC_API_KEY` |
|
|
218
|
+
| Amazon Bedrock | `LLM_PROVIDER=bedrock`, `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_REGION` |
|
|
183
219
|
| OpenAI | `LLM_PROVIDER=openai`, `LLM_API_KEY`, `LLM_MODEL` |
|
|
184
220
|
| OpenRouter | `LLM_PROVIDER=openrouter`, `LLM_API_KEY`, `LLM_MODEL` |
|
|
185
221
|
| Codex (ChatGPT subscription) | run `npm run auth:codex` once, then `LLM_PROVIDER=codex` |
|
|
186
222
|
| Ollama | `LLM_BASE_URL=http://localhost:11434/v1`, `LLM_MODEL` |
|
|
187
223
|
| LM Studio | `LLM_BASE_URL=http://localhost:1234/v1`, `LLM_MODEL` |
|
|
188
224
|
|
|
225
|
+
For Bedrock, set `LLM_MODEL` to a Bedrock model ID such as `anthropic.claude-3-5-sonnet-20241022-v2:0`. Cross-region inference profiles (e.g. `us.anthropic.claude-3-5-sonnet-20241022-v2:0`) are also supported.
|
|
226
|
+
|
|
189
227
|
See `.env.example` for full examples.
|
|
190
228
|
|
|
191
229
|
---
|
package/bin/init.js
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
|
-
import { intro, outro, select, text, isCancel, cancel } from "@clack/prompts";
|
|
2
|
-
import { writeFileSync, existsSync } from "fs";
|
|
1
|
+
import { intro, outro, select, text, confirm, isCancel, cancel } from "@clack/prompts";
|
|
2
|
+
import { writeFileSync, existsSync, mkdirSync } from "fs";
|
|
3
3
|
import { join } from "path";
|
|
4
4
|
|
|
5
5
|
const PROVIDERS = [
|
|
6
6
|
{ value: "anthropic", label: "Anthropic (Claude)", hint: "recommended", defaultModel: "claude-haiku-4-5-20251001" },
|
|
7
|
+
{ value: "bedrock", label: "Amazon Bedrock", hint: "AWS credentials", defaultModel: "anthropic.claude-3-5-haiku-20241022-v1:0" },
|
|
7
8
|
{ value: "openai", label: "OpenAI", defaultModel: "gpt-4o-mini" },
|
|
8
9
|
{ value: "groq", label: "Groq", hint: "free tier available", defaultModel: "llama-3.3-70b-versatile" },
|
|
9
10
|
{ value: "gemini", label: "Gemini", hint: "free tier available", defaultModel: "gemini-2.0-flash" },
|
|
@@ -47,6 +48,22 @@ export async function runInit(cwd) {
|
|
|
47
48
|
placeholder: "sk-ant-...",
|
|
48
49
|
validate: (v) => v?.trim() ? undefined : "Required",
|
|
49
50
|
}));
|
|
51
|
+
} else if (provider === "bedrock") {
|
|
52
|
+
env.LLM_PROVIDER = "bedrock";
|
|
53
|
+
env.AWS_ACCESS_KEY_ID = guard(await text({
|
|
54
|
+
message: "AWS_ACCESS_KEY_ID",
|
|
55
|
+
placeholder: "AKIA...",
|
|
56
|
+
validate: (v) => v?.trim() ? undefined : "Required",
|
|
57
|
+
}));
|
|
58
|
+
env.AWS_SECRET_ACCESS_KEY = guard(await text({
|
|
59
|
+
message: "AWS_SECRET_ACCESS_KEY",
|
|
60
|
+
placeholder: "...",
|
|
61
|
+
validate: (v) => v?.trim() ? undefined : "Required",
|
|
62
|
+
}));
|
|
63
|
+
env.AWS_REGION = guard(await text({
|
|
64
|
+
message: "AWS region",
|
|
65
|
+
defaultValue: "us-east-1",
|
|
66
|
+
}));
|
|
50
67
|
} else if (provider === "ollama") {
|
|
51
68
|
env.LLM_PROVIDER = "ollama";
|
|
52
69
|
const baseUrl = guard(await text({
|
|
@@ -102,5 +119,68 @@ export async function runInit(cwd) {
|
|
|
102
119
|
const lines = Object.entries(env).map(([k, v]) => `${k}=${v}`);
|
|
103
120
|
writeFileSync(envPath, lines.join("\n") + "\n", "utf-8");
|
|
104
121
|
|
|
122
|
+
// ── GitHub Actions workflow (optional) ────────────────────────────
|
|
123
|
+
const wantsWorkflow = guard(await confirm({
|
|
124
|
+
message: "Generate a GitHub Actions workflow for weekly scheduled runs?",
|
|
125
|
+
initialValue: false,
|
|
126
|
+
}));
|
|
127
|
+
|
|
128
|
+
if (wantsWorkflow) {
|
|
129
|
+
const stagingUrl = guard(await text({
|
|
130
|
+
message: "Staging URL (used as BASE_URL in the workflow)",
|
|
131
|
+
placeholder: "https://staging.example.com",
|
|
132
|
+
validate: (v) => v?.trim() ? undefined : "Required",
|
|
133
|
+
}));
|
|
134
|
+
|
|
135
|
+
const workflowDir = join(cwd, ".github", "workflows");
|
|
136
|
+
const workflowPath = join(workflowDir, "shoal-weekly.yml");
|
|
137
|
+
mkdirSync(workflowDir, { recursive: true });
|
|
138
|
+
writeFileSync(workflowPath, `# shoal weekly run
|
|
139
|
+
#
|
|
140
|
+
# Required secrets: ANTHROPIC_API_KEY
|
|
141
|
+
# Required variables: STAGING_URL is hardcoded below — update as needed
|
|
142
|
+
#
|
|
143
|
+
# GitHub Issues are filed automatically using the built-in GITHUB_TOKEN.
|
|
144
|
+
|
|
145
|
+
name: shoal weekly run
|
|
146
|
+
|
|
147
|
+
on:
|
|
148
|
+
schedule:
|
|
149
|
+
- cron: '0 9 * * 1' # every Monday at 09:00 UTC
|
|
150
|
+
workflow_dispatch: # also allow manual trigger from the Actions tab
|
|
151
|
+
|
|
152
|
+
jobs:
|
|
153
|
+
shoal:
|
|
154
|
+
runs-on: ubuntu-latest
|
|
155
|
+
timeout-minutes: 60
|
|
156
|
+
|
|
157
|
+
steps:
|
|
158
|
+
- uses: actions/checkout@v4
|
|
159
|
+
|
|
160
|
+
- uses: actions/setup-node@v4
|
|
161
|
+
with:
|
|
162
|
+
node-version: '20'
|
|
163
|
+
|
|
164
|
+
- name: Install shoal
|
|
165
|
+
run: npm install -g @m8i-51/shoal
|
|
166
|
+
|
|
167
|
+
- name: Install Playwright browsers
|
|
168
|
+
run: npx playwright install chromium --with-deps
|
|
169
|
+
|
|
170
|
+
- name: Run shoal
|
|
171
|
+
env:
|
|
172
|
+
ANTHROPIC_API_KEY: \${{ secrets.ANTHROPIC_API_KEY }}
|
|
173
|
+
BASE_URL: ${stagingUrl.trim()}
|
|
174
|
+
GITHUB_TOKEN: \${{ secrets.GITHUB_TOKEN }}
|
|
175
|
+
GITHUB_REPO: \${{ github.repository }}
|
|
176
|
+
MAX_BROWSERS: '2'
|
|
177
|
+
MAX_EXPLORERS: '0'
|
|
178
|
+
run: shoal
|
|
179
|
+
`, "utf-8");
|
|
180
|
+
|
|
181
|
+
console.log(`\n Created ${workflowPath}`);
|
|
182
|
+
console.log(" Next: add ANTHROPIC_API_KEY to your repo's Actions secrets");
|
|
183
|
+
}
|
|
184
|
+
|
|
105
185
|
outro("Created .env\n\n shoal serve — open the dashboard at http://localhost:4000\n shoal — run agents from the terminal");
|
|
106
186
|
}
|
package/framework/cost.ts
CHANGED
|
@@ -9,6 +9,14 @@ const ANTHROPIC_PRICING: Record<string, { input: number; output: number }> = {
|
|
|
9
9
|
"claude-3-opus-20240229": { input: 15 / 1e6, output: 75 / 1e6 },
|
|
10
10
|
};
|
|
11
11
|
|
|
12
|
+
// Bedrock on-demand pricing (us-east-1, as of 2026-04)
|
|
13
|
+
const BEDROCK_PRICING: Record<string, { input: number; output: number }> = {
|
|
14
|
+
"anthropic.claude-3-5-sonnet-20241022-v2:0": { input: 3 / 1e6, output: 15 / 1e6 },
|
|
15
|
+
"anthropic.claude-3-5-haiku-20241022-v1:0": { input: 0.8 / 1e6, output: 4 / 1e6 },
|
|
16
|
+
"anthropic.claude-3-opus-20240229-v1:0": { input: 15 / 1e6, output: 75 / 1e6 },
|
|
17
|
+
"anthropic.claude-3-haiku-20240307-v1:0": { input: 0.25 / 1e6, output: 1.25 / 1e6 },
|
|
18
|
+
};
|
|
19
|
+
|
|
12
20
|
const OPENAI_PRICING: Record<string, { input: number; output: number }> = {
|
|
13
21
|
"gpt-4o": { input: 5 / 1e6, output: 15 / 1e6 },
|
|
14
22
|
"gpt-4o-mini": { input: 0.15 / 1e6, output: 0.6 / 1e6 },
|
|
@@ -71,6 +79,13 @@ export async function estimateCost(
|
|
|
71
79
|
const key = Object.keys(ANTHROPIC_PRICING).find((k) => model.startsWith(k));
|
|
72
80
|
if (key) pricing = ANTHROPIC_PRICING[key];
|
|
73
81
|
}
|
|
82
|
+
} else if (provider === "bedrock") {
|
|
83
|
+
pricing = BEDROCK_PRICING[model];
|
|
84
|
+
if (!pricing) {
|
|
85
|
+
// cross-region prefix (e.g. "us.anthropic.claude-..." → strip prefix)
|
|
86
|
+
const stripped = model.replace(/^[a-z]{2}\./, "");
|
|
87
|
+
pricing = BEDROCK_PRICING[stripped];
|
|
88
|
+
}
|
|
74
89
|
} else if (provider === "openai") {
|
|
75
90
|
pricing = OPENAI_PRICING[model];
|
|
76
91
|
} else if (provider === "openrouter") {
|
package/framework/llm-client.ts
CHANGED
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
*/
|
|
11
11
|
|
|
12
12
|
import Anthropic from "@anthropic-ai/sdk";
|
|
13
|
+
import AnthropicBedrock from "@anthropic-ai/bedrock-sdk";
|
|
13
14
|
import OpenAI from "openai";
|
|
14
15
|
import * as fs from "fs";
|
|
15
16
|
import * as os from "os";
|
|
@@ -46,6 +47,21 @@ class AnthropicClient {
|
|
|
46
47
|
}
|
|
47
48
|
}
|
|
48
49
|
|
|
50
|
+
// ---- Amazon Bedrock クライアント ----
|
|
51
|
+
|
|
52
|
+
class BedrockClient {
|
|
53
|
+
private client: AnthropicBedrock;
|
|
54
|
+
|
|
55
|
+
constructor() {
|
|
56
|
+
// AWS_ACCESS_KEY_ID / AWS_SECRET_ACCESS_KEY / AWS_REGION を自動読み取り
|
|
57
|
+
this.client = new AnthropicBedrock();
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
async createMessage(params: CreateMessageParams): Promise<Message> {
|
|
61
|
+
return this.client.messages.create(params) as Promise<Message>;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
49
65
|
// ---- OpenAI 互換クライアント ----
|
|
50
66
|
|
|
51
67
|
function toOpenAITools(tools: Tool[]): OpenAI.ChatCompletionTool[] {
|
|
@@ -452,7 +468,7 @@ class CodexClient {
|
|
|
452
468
|
|
|
453
469
|
// ---- Factory ----
|
|
454
470
|
|
|
455
|
-
export type LLMClient = AnthropicClient | OpenAICompatClient | CodexClient;
|
|
471
|
+
export type LLMClient = AnthropicClient | BedrockClient | OpenAICompatClient | CodexClient;
|
|
456
472
|
|
|
457
473
|
// OpenAI-compat プロバイダのデフォルト設定
|
|
458
474
|
// LLM_BASE_URL / LLM_MODEL で個別上書き可能
|
|
@@ -470,6 +486,17 @@ export function createLLMClient(): { client: LLMClient; defaultModel: string; pr
|
|
|
470
486
|
const baseURL = process.env.LLM_BASE_URL;
|
|
471
487
|
const model = process.env.LLM_MODEL;
|
|
472
488
|
|
|
489
|
+
// Bedrock
|
|
490
|
+
if (provider === "bedrock") {
|
|
491
|
+
const effectiveModel = model ?? "anthropic.claude-3-5-haiku-20241022-v1:0";
|
|
492
|
+
console.log(`[LLM] provider: Amazon Bedrock (region: ${process.env.AWS_REGION ?? "us-east-1"}), model: ${effectiveModel}`);
|
|
493
|
+
return {
|
|
494
|
+
client: new BedrockClient(),
|
|
495
|
+
defaultModel: effectiveModel,
|
|
496
|
+
provider: "bedrock",
|
|
497
|
+
};
|
|
498
|
+
}
|
|
499
|
+
|
|
473
500
|
// Codex は独自クライアント
|
|
474
501
|
if (provider === "codex") {
|
|
475
502
|
const effectiveModel = model ?? "gpt-5.1-codex-mini";
|
package/framework/report.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import * as fs from "fs";
|
|
2
2
|
import * as path from "path";
|
|
3
|
-
import type { Finding, RunLog } from "./types";
|
|
3
|
+
import type { Finding, RunLog, RegressionCheck } from "./types";
|
|
4
4
|
import type { ProductSpec } from "./product-discovery";
|
|
5
5
|
import type { TriageResult } from "./triage";
|
|
6
6
|
import type { Scenario, ScenarioOutcome } from "./scenario-designer";
|
|
@@ -51,6 +51,10 @@ export function generateReport(
|
|
|
51
51
|
): string {
|
|
52
52
|
const reportPath = path.join(process.cwd(), "logs", `report_${runLog.runId}.html`);
|
|
53
53
|
|
|
54
|
+
const allRegressionChecks: RegressionCheck[] = runLog.agents.flatMap((a) => a.regressionChecks ?? []);
|
|
55
|
+
const fixedChecks = allRegressionChecks.filter((c) => c.status === "fixed");
|
|
56
|
+
const regressedChecks = allRegressionChecks.filter((c) => c.status === "regressed");
|
|
57
|
+
|
|
54
58
|
const issuedSet = new Set(triageResult.issued);
|
|
55
59
|
const skippedSet = new Set(triageResult.skipped);
|
|
56
60
|
|
|
@@ -242,10 +246,28 @@ export function generateReport(
|
|
|
242
246
|
<div class="stat-card"><div class="number">${triageResult.skipped.length}</div><div class="label">skipped</div></div>
|
|
243
247
|
<div class="stat-card"><div class="number">${triageResult.unprocessed.length}</div><div class="label">pending</div></div>
|
|
244
248
|
<div class="stat-card"><div class="number">${runLog.agents.length}</div><div class="label">agents</div></div>
|
|
249
|
+
${allRegressionChecks.length > 0 ? `<div class="stat-card"><div class="number" style="color:#22c55e">${fixedChecks.length}</div><div class="label">still fixed</div></div><div class="stat-card"><div class="number" style="color:${regressedChecks.length > 0 ? "#ef4444" : "#94a3b8"}">${regressedChecks.length}</div><div class="label">regressed</div></div>` : ""}
|
|
245
250
|
</div>
|
|
246
251
|
<div class="category-bar">${categoryBar || '<div style="width:100%;display:flex;align-items:center;padding:0 .75rem;font-size:.75rem;color:#94a3b8">no findings</div>'}</div>
|
|
247
252
|
</section>
|
|
248
253
|
|
|
254
|
+
${allRegressionChecks.length > 0 ? `
|
|
255
|
+
<section>
|
|
256
|
+
<h2>Progress (${allRegressionChecks.length} issues checked)</h2>
|
|
257
|
+
${regressedChecks.length > 0 ? `<p style="color:#ef4444;font-size:.875rem;margin-bottom:.75rem">⚠ ${regressedChecks.length} regression${regressedChecks.length !== 1 ? "s" : ""} detected</p>` : `<p style="color:#22c55e;font-size:.875rem;margin-bottom:.75rem">✓ All previously fixed issues remain resolved</p>`}
|
|
258
|
+
<table>
|
|
259
|
+
<thead><tr><th>#</th><th>Issue</th><th style="text-align:center">Status</th></tr></thead>
|
|
260
|
+
<tbody>
|
|
261
|
+
${allRegressionChecks.map((c) => `
|
|
262
|
+
<tr>
|
|
263
|
+
<td style="color:#94a3b8">#${c.issueNumber}</td>
|
|
264
|
+
<td>${esc(c.issueTitle)}</td>
|
|
265
|
+
<td style="text-align:center">${c.status === "fixed" ? '<span class="badge" style="background:#22c55e">✓ fixed</span>' : '<span class="badge" style="background:#ef4444">⚠ regressed</span>'}</td>
|
|
266
|
+
</tr>`).join("")}
|
|
267
|
+
</tbody>
|
|
268
|
+
</table>
|
|
269
|
+
</section>` : ""}
|
|
270
|
+
|
|
249
271
|
<section>
|
|
250
272
|
<h2>Findings (${findings.length})</h2>
|
|
251
273
|
${sortedFindings.length > 0 ? findingCards : "<p style='color:#94a3b8;font-size:.875rem'>No findings collected.</p>"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@m8i-51/shoal",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.6",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Multi-agent web exploration framework — finds bugs, UX issues, and missing features by running AI agents against your app",
|
|
6
6
|
"repository": {
|
|
@@ -34,6 +34,7 @@
|
|
|
34
34
|
"test:watch": "vitest"
|
|
35
35
|
},
|
|
36
36
|
"dependencies": {
|
|
37
|
+
"@anthropic-ai/bedrock-sdk": "^0.29.1",
|
|
37
38
|
"@anthropic-ai/sdk": "^0.91.1",
|
|
38
39
|
"@clack/prompts": "^1.3.0",
|
|
39
40
|
"dotenv": "^17.3.1",
|
package/run.ts
CHANGED
|
@@ -43,6 +43,7 @@ import { estimateCost, formatCostUSD } from "./framework/cost";
|
|
|
43
43
|
const BASE_URL = process.env.BASE_URL ?? "http://localhost:3000";
|
|
44
44
|
const GITHUB_TOKEN = process.env.GITHUB_TOKEN ?? "";
|
|
45
45
|
const GITHUB_REPO = process.env.GITHUB_REPO ?? "";
|
|
46
|
+
const REFRESH_SPEC = process.env.REFRESH_SPEC === "1";
|
|
46
47
|
const githubOptions = { token: GITHUB_TOKEN, repo: GITHUB_REPO };
|
|
47
48
|
|
|
48
49
|
const TARGET = process.env.TARGET ?? "none";
|
|
@@ -110,7 +111,14 @@ const POST_FEEDBACK_TOOL: Tool = {
|
|
|
110
111
|
type: "object",
|
|
111
112
|
properties: {
|
|
112
113
|
title: { type: "string" },
|
|
113
|
-
body: {
|
|
114
|
+
body: {
|
|
115
|
+
type: "string",
|
|
116
|
+
description: `Describe the finding. Tone varies by category:
|
|
117
|
+
- bug: technical — state what happened, what was expected, and steps to reproduce.
|
|
118
|
+
- ux: experiential — write from the user's perspective ("I tried to...", "It was hard to find...", "I got confused when...").
|
|
119
|
+
- feature-request: aspirational — describe what you wished you could do ("It would have been helpful if...", "I wanted to...").
|
|
120
|
+
- goal-gap: goal-oriented — explain which goal was blocked and why ("I was trying to achieve X, but couldn't because...").`,
|
|
121
|
+
},
|
|
114
122
|
category: { type: "string", enum: ["ux", "feature-request", "bug", "goal-gap"] },
|
|
115
123
|
},
|
|
116
124
|
required: ["title", "body", "category"],
|
|
@@ -317,6 +325,12 @@ ${productSpec.appDescription}
|
|
|
317
325
|
If you notice anything inconvenient, a missing feature, or bug-like behavior,
|
|
318
326
|
report it with the post_feedback tool.
|
|
319
327
|
|
|
328
|
+
When writing the body, match the tone to the category:
|
|
329
|
+
- bug: technical ("The endpoint returned 500 when...", "Expected X but got Y")
|
|
330
|
+
- ux: experiential ("I tried to find the button but...", "It was unclear what would happen if...")
|
|
331
|
+
- feature-request: aspirational ("It would have been useful if...", "I wished I could...")
|
|
332
|
+
- goal-gap: goal-oriented ("I was trying to X, but couldn't because...")
|
|
333
|
+
|
|
320
334
|
[Implemented Features]
|
|
321
335
|
${productSpec.features}
|
|
322
336
|
${productSpec.uiFeatures ? `\n[UI-Only Features]\nThese features exist in the UI but may not be reflected in API responses. Keep them in mind when interpreting API results.\n${productSpec.uiFeatures}\n` : ""}${productSpec.designContext ? `\n[Design Context]\n${productSpec.designContext}\n` : ""}${goalsSection(productSpec)}${assignment.scenario
|
|
@@ -882,6 +896,12 @@ ${productSpec.appDescription}
|
|
|
882
896
|
4. Move to another page and repeat
|
|
883
897
|
5. Finish after 8–10 actions
|
|
884
898
|
|
|
899
|
+
When writing the body, match the tone to the category:
|
|
900
|
+
- bug: technical ("The endpoint returned 500 when...", "Expected X but got Y")
|
|
901
|
+
- ux: experiential ("I tried to find the button but...", "It was unclear what would happen if...")
|
|
902
|
+
- feature-request: aspirational ("It would have been useful if...", "I wished I could...")
|
|
903
|
+
- goal-gap: goal-oriented ("I was trying to X, but couldn't because...")
|
|
904
|
+
|
|
885
905
|
[Using Observation Tools]
|
|
886
906
|
- To verify an action was actually applied, call diff_since_last_action
|
|
887
907
|
- If data isn't reflected or errors appear, call read_network_errors
|
|
@@ -1041,8 +1061,13 @@ async function main() {
|
|
|
1041
1061
|
const scenarioOutcomes: ScenarioOutcome[] = [];
|
|
1042
1062
|
try {
|
|
1043
1063
|
const cached = loadCachedSpec(BASE_URL);
|
|
1044
|
-
if (cached) {
|
|
1045
|
-
|
|
1064
|
+
if (cached && !REFRESH_SPEC) {
|
|
1065
|
+
const ageDays = cached.discoveredAt
|
|
1066
|
+
? Math.floor((Date.now() - new Date(cached.discoveredAt).getTime()) / 86_400_000)
|
|
1067
|
+
: null;
|
|
1068
|
+
const ageStr = ageDays != null ? `${ageDays} day${ageDays !== 1 ? "s" : ""} old` : "unknown date";
|
|
1069
|
+
const staleHint = ageDays != null && ageDays >= 7 ? " — set REFRESH_SPEC=1 to re-run discovery" : "";
|
|
1070
|
+
console.log(`\n[product-discovery] using cache (${ageStr}, confidence: ${cached.confidence})${staleHint}`);
|
|
1046
1071
|
productSpec = cached;
|
|
1047
1072
|
} else {
|
|
1048
1073
|
const discoveryContext = await browser.newContext({ viewport: { width: 1024, height: 640 } });
|
package/server/index.ts
CHANGED
|
@@ -6,6 +6,7 @@ import { dirname, join, resolve } from "path";
|
|
|
6
6
|
import { existsSync, readFileSync, writeFileSync } from "fs";
|
|
7
7
|
import { listRuns, getReportPath } from "./runs.js";
|
|
8
8
|
import { activeSessions, spawnRun, cancelSession } from "./runner.js";
|
|
9
|
+
import { loadSchedule, saveSchedule, startScheduler, type ScheduleConfig } from "./scheduler.js";
|
|
9
10
|
|
|
10
11
|
function specFilePath(baseUrl: string): string {
|
|
11
12
|
try {
|
|
@@ -248,6 +249,28 @@ process.on("unhandledRejection", (reason) => {
|
|
|
248
249
|
console.error("[server] unhandledRejection:", reason);
|
|
249
250
|
});
|
|
250
251
|
|
|
252
|
+
// ----------------------------------------------------------------
|
|
253
|
+
// API: schedule config
|
|
254
|
+
// ----------------------------------------------------------------
|
|
255
|
+
app.get("/api/schedule", (_req, res) => {
|
|
256
|
+
res.json(loadSchedule());
|
|
257
|
+
});
|
|
258
|
+
|
|
259
|
+
app.patch("/api/schedule", (req, res) => {
|
|
260
|
+
const current = loadSchedule();
|
|
261
|
+
const { enabled, dayOfWeek, hour, minute } = req.body as Partial<ScheduleConfig>;
|
|
262
|
+
const updated: ScheduleConfig = {
|
|
263
|
+
...current,
|
|
264
|
+
...(enabled != null ? { enabled: Boolean(enabled) } : {}),
|
|
265
|
+
...(dayOfWeek != null && Number.isInteger(dayOfWeek) && dayOfWeek >= 0 && dayOfWeek <= 6 ? { dayOfWeek } : {}),
|
|
266
|
+
...(hour != null && Number.isInteger(hour) && hour >= 0 && hour <= 23 ? { hour } : {}),
|
|
267
|
+
...(minute != null && Number.isInteger(minute) && minute >= 0 && minute <= 59 ? { minute } : {}),
|
|
268
|
+
};
|
|
269
|
+
saveSchedule(updated);
|
|
270
|
+
res.json(updated);
|
|
271
|
+
});
|
|
272
|
+
|
|
251
273
|
app.listen(PORT, () => {
|
|
252
274
|
console.log(`\nshoal dashboard → http://localhost:${PORT}\n`);
|
|
275
|
+
startScheduler();
|
|
253
276
|
});
|
package/server/runs.ts
CHANGED
|
@@ -15,6 +15,8 @@ export interface RunSummary {
|
|
|
15
15
|
hasReport: boolean;
|
|
16
16
|
isLive?: boolean;
|
|
17
17
|
estimatedCostUSD: number | null;
|
|
18
|
+
regressionChecked: number;
|
|
19
|
+
regressionFailed: number;
|
|
18
20
|
}
|
|
19
21
|
|
|
20
22
|
function countFindings(runId: string): { total: number; byCategory: Record<string, number> } {
|
|
@@ -63,6 +65,8 @@ export function listRuns(): RunSummary[] {
|
|
|
63
65
|
hasReport: false,
|
|
64
66
|
isLive: true,
|
|
65
67
|
estimatedCostUSD: null,
|
|
68
|
+
regressionChecked: 0,
|
|
69
|
+
regressionFailed: 0,
|
|
66
70
|
});
|
|
67
71
|
}
|
|
68
72
|
} catch { /* skip */ }
|
|
@@ -90,6 +94,8 @@ export function listRuns(): RunSummary[] {
|
|
|
90
94
|
findingsByCategory: byCategory,
|
|
91
95
|
hasReport: fs.existsSync(reportPath),
|
|
92
96
|
estimatedCostUSD: log.summary?.cost?.estimatedUSD ?? null,
|
|
97
|
+
regressionChecked: log.summary?.regressionChecked ?? 0,
|
|
98
|
+
regressionFailed: log.summary?.regressionFailed ?? 0,
|
|
93
99
|
});
|
|
94
100
|
} catch { /* skip */ }
|
|
95
101
|
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import { existsSync, readFileSync, writeFileSync } from "fs";
|
|
2
|
+
import { join } from "path";
|
|
3
|
+
import { spawnRun } from "./runner.js";
|
|
4
|
+
|
|
5
|
+
export interface ScheduleConfig {
|
|
6
|
+
enabled: boolean;
|
|
7
|
+
dayOfWeek: number; // 0=Sun 1=Mon ... 6=Sat
|
|
8
|
+
hour: number;
|
|
9
|
+
minute: number;
|
|
10
|
+
lastRunDate: string | null; // YYYY-MM-DD — prevents double-trigger
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
const DEFAULT_CONFIG: ScheduleConfig = {
|
|
14
|
+
enabled: false,
|
|
15
|
+
dayOfWeek: 1,
|
|
16
|
+
hour: 9,
|
|
17
|
+
minute: 0,
|
|
18
|
+
lastRunDate: null,
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
function configPath(): string {
|
|
22
|
+
return join(process.cwd(), "schedule.json");
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export function loadSchedule(): ScheduleConfig {
|
|
26
|
+
const p = configPath();
|
|
27
|
+
if (!existsSync(p)) return { ...DEFAULT_CONFIG };
|
|
28
|
+
try {
|
|
29
|
+
return { ...DEFAULT_CONFIG, ...JSON.parse(readFileSync(p, "utf-8")) };
|
|
30
|
+
} catch {
|
|
31
|
+
return { ...DEFAULT_CONFIG };
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export function saveSchedule(config: ScheduleConfig): void {
|
|
36
|
+
writeFileSync(configPath(), JSON.stringify(config, null, 2), "utf-8");
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export function startScheduler(): void {
|
|
40
|
+
const check = () => {
|
|
41
|
+
const config = loadSchedule();
|
|
42
|
+
if (!config.enabled) return;
|
|
43
|
+
|
|
44
|
+
const now = new Date();
|
|
45
|
+
const today = now.toISOString().slice(0, 10);
|
|
46
|
+
|
|
47
|
+
// ±1 分のウィンドウで判定(interval のズレを吸収)
|
|
48
|
+
const nowMin = now.getDay() * 1440 + now.getHours() * 60 + now.getMinutes();
|
|
49
|
+
const targetMin = config.dayOfWeek * 1440 + config.hour * 60 + config.minute;
|
|
50
|
+
const diff = nowMin - targetMin;
|
|
51
|
+
|
|
52
|
+
if (diff >= 0 && diff < 2 && config.lastRunDate !== today) {
|
|
53
|
+
console.log(`[scheduler] triggering scheduled run (${today})`);
|
|
54
|
+
spawnRun({});
|
|
55
|
+
saveSchedule({ ...config, lastRunDate: today });
|
|
56
|
+
}
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
// 次の分の頭に揃えてから毎分チェック
|
|
60
|
+
const msToNextMinute = 60_000 - (Date.now() % 60_000);
|
|
61
|
+
setTimeout(() => {
|
|
62
|
+
check();
|
|
63
|
+
setInterval(check, 60_000);
|
|
64
|
+
}, msToNextMinute);
|
|
65
|
+
}
|