@kestrel-agents/ruhroh 0.5.0-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +114 -0
- package/assets/ruhroh-badge.png +0 -0
- package/assets/ruhroh-logo.png +0 -0
- package/dist/adapters.d.ts +97 -0
- package/dist/adapters.d.ts.map +1 -0
- package/dist/adapters.js +21 -0
- package/dist/adapters.js.map +1 -0
- package/dist/builtin-scenarios.d.ts +8 -0
- package/dist/builtin-scenarios.d.ts.map +1 -0
- package/dist/builtin-scenarios.js +22 -0
- package/dist/builtin-scenarios.js.map +1 -0
- package/dist/cli.d.ts +30 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +313 -0
- package/dist/cli.js.map +1 -0
- package/dist/env.d.ts +6 -0
- package/dist/env.d.ts.map +1 -0
- package/dist/env.js +66 -0
- package/dist/env.js.map +1 -0
- package/dist/generate.d.ts +32 -0
- package/dist/generate.d.ts.map +1 -0
- package/dist/generate.js +231 -0
- package/dist/generate.js.map +1 -0
- package/dist/harbor.d.ts +28 -0
- package/dist/harbor.d.ts.map +1 -0
- package/dist/harbor.js +47 -0
- package/dist/harbor.js.map +1 -0
- package/dist/index.d.ts +8 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +8 -0
- package/dist/index.js.map +1 -0
- package/dist/results.d.ts +66 -0
- package/dist/results.d.ts.map +1 -0
- package/dist/results.js +31 -0
- package/dist/results.js.map +1 -0
- package/dist/scenarios.d.ts +61 -0
- package/dist/scenarios.d.ts.map +1 -0
- package/dist/scenarios.js +69 -0
- package/dist/scenarios.js.map +1 -0
- package/package.json +66 -0
- package/python/ruhroh/__init__.py +5 -0
- package/python/ruhroh/harbor_agent.py +345 -0
- package/python/ruhroh/loop_controller.py +783 -0
- package/python/ruhroh/setup.sh +12 -0
- package/scenarios/grocery-budget-planner/instruction.md +1 -0
- package/scenarios/grocery-budget-planner/scenario.json +44 -0
- package/scenarios/nextjs-task-board/instruction.md +1 -0
- package/scenarios/nextjs-task-board/scenario.json +45 -0
- package/scenarios/shift-coverage-planner/assets/prompt-assets/shift-coverage/coverage-rules.json +29 -0
- package/scenarios/shift-coverage-planner/assets/prompt-assets/shift-coverage/employees.csv +8 -0
- package/scenarios/shift-coverage-planner/assets/prompt-assets/shift-coverage/existing-schedule.csv +9 -0
- package/scenarios/shift-coverage-planner/assets/prompt-assets/shift-coverage/shift-requirements.csv +8 -0
- package/scenarios/shift-coverage-planner/assets/prompt-assets/shift-coverage/time-off-requests.csv +5 -0
- package/scenarios/shift-coverage-planner/instruction.md +1 -0
- package/scenarios/shift-coverage-planner/scenario.json +47 -0
- package/scenarios/simple-newsletter/instruction.md +1 -0
- package/scenarios/simple-newsletter/scenario.json +40 -0
- package/scenarios/vite-csv-reconciliation/assets/prompt-assets/csv-reconciliation-people/source-a.csv +9 -0
- package/scenarios/vite-csv-reconciliation/assets/prompt-assets/csv-reconciliation-people/source-b.csv +9 -0
- package/scenarios/vite-csv-reconciliation/instruction.md +1 -0
- package/scenarios/vite-csv-reconciliation/scenario.json +48 -0
- package/scenarios/vite-sprint-planner/instruction.md +1 -0
- package/scenarios/vite-sprint-planner/scenario.json +45 -0
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
export DEBIAN_FRONTEND=noninteractive
|
|
5
|
+
|
|
6
|
+
if command -v apt-get >/dev/null 2>&1; then
|
|
7
|
+
apt-get update
|
|
8
|
+
apt-get install -y --no-install-recommends bash ca-certificates python3
|
|
9
|
+
rm -rf /var/lib/apt/lists/*
|
|
10
|
+
fi
|
|
11
|
+
|
|
12
|
+
chmod +x /installed-agent/ruhroh_loop_controller.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
Build me a local grocery budget planner for a weekend dinner. It should work in this empty folder with no dependencies. The app should let me add grocery items with a name, category, estimated price, and whether it is already purchased. It should show the grocery list grouped by category, let me mark items purchased, and update the remaining budget and purchased/unpurchased totals as the list changes. Seed it with a realistic starter list so I can use it immediately, remember my changes locally when I reload the page, and before you finish, check that adding an item, marking an item purchased, and the totals are wired together.
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": "ruhroh_scenario_v2",
|
|
3
|
+
"id": "grocery-budget-planner",
|
|
4
|
+
"title": "Grocery Budget Planner",
|
|
5
|
+
"tier": "nightly",
|
|
6
|
+
"kind": "real_user",
|
|
7
|
+
"userPromptPath": "instruction.md",
|
|
8
|
+
"run": {
|
|
9
|
+
"mode": "build",
|
|
10
|
+
"timeoutSeconds": 600
|
|
11
|
+
},
|
|
12
|
+
"requires": {
|
|
13
|
+
"continuity": "workspace_plus_transcript",
|
|
14
|
+
"tools": [
|
|
15
|
+
"filesystem",
|
|
16
|
+
"shell"
|
|
17
|
+
],
|
|
18
|
+
"network": false
|
|
19
|
+
},
|
|
20
|
+
"loop": {
|
|
21
|
+
"defaultMaxIterations": 3,
|
|
22
|
+
"stopPolicy": "goal_satisfied_or_max"
|
|
23
|
+
},
|
|
24
|
+
"evaluation": {
|
|
25
|
+
"mode": "agentic_goal_review",
|
|
26
|
+
"scenarioContext": [
|
|
27
|
+
"This is a dependency-free local app-development task.",
|
|
28
|
+
"The user asked for an immediately usable grocery budget planner, not a prose plan.",
|
|
29
|
+
"The user did not prescribe a framework or file layout."
|
|
30
|
+
],
|
|
31
|
+
"goalRubric": [
|
|
32
|
+
"The final workspace provides a locally usable grocery budget planner.",
|
|
33
|
+
"The app supports adding grocery items with name, category, estimated price, and purchased state.",
|
|
34
|
+
"The app groups or clearly organizes items by category.",
|
|
35
|
+
"Purchased and unpurchased totals plus remaining budget update when the list changes.",
|
|
36
|
+
"Starter dinner grocery data is present and local persistence survives reloads."
|
|
37
|
+
],
|
|
38
|
+
"evidenceGuidance": [
|
|
39
|
+
"Inspect the final workspace and run or open the local app when practical.",
|
|
40
|
+
"Exercise the user workflow: add an item, mark an item purchased, and verify totals update.",
|
|
41
|
+
"Do not require a specific filename or framework unless the delivered app itself documents that as its local run path."
|
|
42
|
+
]
|
|
43
|
+
}
|
|
44
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
Build a small local full-stack Next.js task board app with file-backed persistence. It should support creating a task, editing a task, archiving a task, validation for empty input, and an empty-state view. The app should expose a clear task management UI with an add-task form, a task list, and controls that prove edit/archive behavior is wired. Use a local Next.js API or server route backed by local files under the project workspace rather than a client-only store, external database, or hosted service. Before you finish, make sure the app builds successfully and run practical local quality checks such as linting or typechecking when the scaffold supports them. Before you finish, check the app like a user would: create a task, edit it, archive it, confirm validation for empty input, and confirm persisted data still makes sense after reload or restart.
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": "ruhroh_scenario_v2",
|
|
3
|
+
"id": "nextjs-task-board",
|
|
4
|
+
"title": "Next.js Task Board",
|
|
5
|
+
"tier": "nightly",
|
|
6
|
+
"kind": "real_user",
|
|
7
|
+
"userPromptPath": "instruction.md",
|
|
8
|
+
"run": {
|
|
9
|
+
"mode": "build",
|
|
10
|
+
"timeoutSeconds": 900
|
|
11
|
+
},
|
|
12
|
+
"requires": {
|
|
13
|
+
"continuity": "workspace_plus_transcript",
|
|
14
|
+
"tools": [
|
|
15
|
+
"filesystem",
|
|
16
|
+
"shell"
|
|
17
|
+
],
|
|
18
|
+
"network": true
|
|
19
|
+
},
|
|
20
|
+
"loop": {
|
|
21
|
+
"defaultMaxIterations": 3,
|
|
22
|
+
"stopPolicy": "goal_satisfied_or_max"
|
|
23
|
+
},
|
|
24
|
+
"evaluation": {
|
|
25
|
+
"mode": "agentic_goal_review",
|
|
26
|
+
"scenarioContext": [
|
|
27
|
+
"This is a local full-stack Next.js app-development task.",
|
|
28
|
+
"The scenario is inspired by Mountaintop's task-board case, but Ruhroh should judge the delivered user workflow rather than static required artifact names.",
|
|
29
|
+
"External databases and hosted services are out of scope; persistence should be local to the workspace."
|
|
30
|
+
],
|
|
31
|
+
"goalRubric": [
|
|
32
|
+
"The final workspace provides a local full-stack Next.js task board app.",
|
|
33
|
+
"The app supports creating, editing, and archiving tasks through a visible UI.",
|
|
34
|
+
"The app validates empty input and has a sensible empty state.",
|
|
35
|
+
"Task data persists through a local API or server route backed by workspace-local file storage, not only browser localStorage.",
|
|
36
|
+
"The app builds successfully and avoids obvious scaffold/runtime errors.",
|
|
37
|
+
"A reviewer can run the app locally and verify the task workflow."
|
|
38
|
+
],
|
|
39
|
+
"evidenceGuidance": [
|
|
40
|
+
"Run install/build/start commands in the eval workspace when practical.",
|
|
41
|
+
"Explore the UI/API workflow as a user: create, edit, archive, empty validation, and persistence after reload or restart.",
|
|
42
|
+
"Do not require Mountaintop's historical filenames as a pass/fail condition unless the user-facing delivered app otherwise fails the workflow."
|
|
43
|
+
]
|
|
44
|
+
}
|
|
45
|
+
}
|
package/scenarios/shift-coverage-planner/assets/prompt-assets/shift-coverage/coverage-rules.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
{
|
|
2
|
+
"weekStart": "2026-06-15",
|
|
3
|
+
"timezone": "America/New_York",
|
|
4
|
+
"minimumRestHoursBetweenShifts": 10,
|
|
5
|
+
"maxShiftHours": 8,
|
|
6
|
+
"countPendingTimeOffAsWarning": true,
|
|
7
|
+
"roleHierarchy": {
|
|
8
|
+
"manager": ["manager"],
|
|
9
|
+
"shift_lead": ["shift_lead", "manager"],
|
|
10
|
+
"barista": ["barista", "shift_lead", "manager"],
|
|
11
|
+
"cashier": ["cashier", "shift_lead", "manager"]
|
|
12
|
+
},
|
|
13
|
+
"violationTypes": [
|
|
14
|
+
"uncovered_shift",
|
|
15
|
+
"approved_time_off_conflict",
|
|
16
|
+
"pending_time_off_warning",
|
|
17
|
+
"role_mismatch",
|
|
18
|
+
"overlapping_assignments",
|
|
19
|
+
"insufficient_rest",
|
|
20
|
+
"overtime"
|
|
21
|
+
],
|
|
22
|
+
"replacementRanking": [
|
|
23
|
+
"has_required_role",
|
|
24
|
+
"available_for_entire_shift",
|
|
25
|
+
"does_not_create_overlap",
|
|
26
|
+
"keeps_weekly_hours_under_max",
|
|
27
|
+
"preserves_minimum_rest"
|
|
28
|
+
]
|
|
29
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
employee_id,name,primary_role,secondary_roles,max_weekly_hours,preferred_weekly_hours,availability_notes
|
|
2
|
+
E-101,Ava Chen,barista,cashier,32,28,"Cannot open before 07:00 on Wednesday"
|
|
3
|
+
E-102,Mateo Rivera,shift_lead,barista,38,34,"Prefers opening shifts"
|
|
4
|
+
E-103,Priya Shah,manager,shift_lead,40,36,"Unavailable Friday after 15:00"
|
|
5
|
+
E-104,Theo Brooks,barista,cashier,30,24,"Approved time off Wednesday morning"
|
|
6
|
+
E-105,Lina Ortiz,cashier,barista,28,22,"Only available after 10:00 on Tuesday"
|
|
7
|
+
E-106,Samir Patel,barista,,24,20,"Cannot work closing shifts"
|
|
8
|
+
E-107,Nora Kim,shift_lead,cashier,36,30,"Can cover manager duties only with manager approval"
|
package/scenarios/shift-coverage-planner/assets/prompt-assets/shift-coverage/existing-schedule.csv
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
assignment_id,shift_id,employee_id,assigned_role,notes
|
|
2
|
+
A-301,S-001,E-101,barista,Only one barista is assigned to the Monday opening rush
|
|
3
|
+
A-302,S-002,E-105,cashier,
|
|
4
|
+
A-303,S-003,E-101,shift_lead,Role mismatch because Ava is not a shift lead
|
|
5
|
+
A-304,S-004,E-104,barista,Conflicts with Theo's approved time off
|
|
6
|
+
A-305,S-005,E-104,cashier,Overlaps with Theo's Wednesday espresso shift
|
|
7
|
+
A-306,S-006,E-106,barista,Pending time off overlaps the second half
|
|
8
|
+
A-307,S-007,E-103,manager,Conflicts with Priya's Friday time off
|
|
9
|
+
A-308,S-007,E-102,barista,Extra coverage but does not satisfy manager requirement
|
package/scenarios/shift-coverage-planner/assets/prompt-assets/shift-coverage/shift-requirements.csv
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
shift_id,date,start_time,end_time,area,required_role,min_staff,notes
|
|
2
|
+
S-001,2026-06-15,06:00,12:00,espresso_bar,barista,2,Monday opening rush needs two baristas
|
|
3
|
+
S-002,2026-06-15,09:00,15:00,front_register,cashier,1,Lunch register coverage
|
|
4
|
+
S-003,2026-06-16,14:00,22:00,closing_floor,shift_lead,1,Closing shift needs a shift lead
|
|
5
|
+
S-004,2026-06-17,06:00,12:00,espresso_bar,barista,1,Wednesday opening espresso coverage
|
|
6
|
+
S-005,2026-06-17,08:00,14:00,front_register,cashier,1,Wednesday register coverage
|
|
7
|
+
S-006,2026-06-18,12:00,20:00,kitchen_pickup,barista,1,Afternoon drink handoff station
|
|
8
|
+
S-007,2026-06-19,14:00,22:00,closing_floor,manager,1,Friday close needs manager coverage
|
package/scenarios/shift-coverage-planner/assets/prompt-assets/shift-coverage/time-off-requests.csv
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
request_id,employee_id,date,start_time,end_time,status,reason
|
|
2
|
+
R-201,E-104,2026-06-17,06:00,12:00,approved,Dental appointment
|
|
3
|
+
R-202,E-103,2026-06-19,15:00,22:00,approved,Family event
|
|
4
|
+
R-203,E-106,2026-06-18,16:00,22:00,pending,Class registration
|
|
5
|
+
R-204,E-105,2026-06-16,06:00,10:00,approved,Childcare coverage
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
Build a local app that repairs a staff schedule using the CSV and JSON files in prompt-assets/shift-coverage. It should load the employee roster, shift requirements, existing schedule, time-off requests, and coverage rules. The app should show coverage gaps, suggest replacements, preserve constraints, and keep an audit trail of schedule changes. Include import/export behavior so the repaired schedule and audit trail can be reviewed locally.
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": "ruhroh_scenario_v2",
|
|
3
|
+
"id": "shift-coverage-planner",
|
|
4
|
+
"title": "Shift Coverage Planner",
|
|
5
|
+
"tier": "nightly",
|
|
6
|
+
"kind": "real_user",
|
|
7
|
+
"userPromptPath": "instruction.md",
|
|
8
|
+
"assets": [
|
|
9
|
+
"assets/prompt-assets/shift-coverage"
|
|
10
|
+
],
|
|
11
|
+
"run": {
|
|
12
|
+
"mode": "build",
|
|
13
|
+
"timeoutSeconds": 900
|
|
14
|
+
},
|
|
15
|
+
"requires": {
|
|
16
|
+
"continuity": "workspace_plus_transcript",
|
|
17
|
+
"tools": [
|
|
18
|
+
"filesystem",
|
|
19
|
+
"shell"
|
|
20
|
+
],
|
|
21
|
+
"network": false
|
|
22
|
+
},
|
|
23
|
+
"loop": {
|
|
24
|
+
"defaultMaxIterations": 3,
|
|
25
|
+
"stopPolicy": "goal_satisfied_or_max"
|
|
26
|
+
},
|
|
27
|
+
"evaluation": {
|
|
28
|
+
"mode": "agentic_goal_review",
|
|
29
|
+
"scenarioContext": [
|
|
30
|
+
"This is a complex local app-development task with provided schedule assets.",
|
|
31
|
+
"The prompt-assets/shift-coverage directory is the source material supplied to the run-agent.",
|
|
32
|
+
"The user did not prescribe a framework, route, file layout, or command contract."
|
|
33
|
+
],
|
|
34
|
+
"goalRubric": [
|
|
35
|
+
"The app uses the provided shift-coverage assets as the basis for the schedule workflow.",
|
|
36
|
+
"The app helps a user identify coverage gaps and time-off conflicts.",
|
|
37
|
+
"The app suggests or records replacement assignments while respecting the stated constraints.",
|
|
38
|
+
"The app exposes an audit trail and a way to review or export the repaired schedule.",
|
|
39
|
+
"The final delivered app is usable locally by a reviewer."
|
|
40
|
+
],
|
|
41
|
+
"evidenceGuidance": [
|
|
42
|
+
"Inspect the final workspace, the copied prompt assets, and the implementation choices before judging.",
|
|
43
|
+
"Run local commands or start the app in the eval workspace when that is the most direct way to verify the delivered behavior.",
|
|
44
|
+
"Do not require a specific framework, route path, source filename, or build command unless the user goal itself made that explicit."
|
|
45
|
+
]
|
|
46
|
+
}
|
|
47
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
Build me a simple newsletter page with three sample stories. It should work locally in this empty folder.
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": "ruhroh_scenario_v2",
|
|
3
|
+
"id": "simple-newsletter",
|
|
4
|
+
"title": "Simple Newsletter",
|
|
5
|
+
"tier": "smoke",
|
|
6
|
+
"kind": "real_user",
|
|
7
|
+
"userPromptPath": "instruction.md",
|
|
8
|
+
"run": {
|
|
9
|
+
"mode": "build",
|
|
10
|
+
"timeoutSeconds": 420
|
|
11
|
+
},
|
|
12
|
+
"requires": {
|
|
13
|
+
"continuity": "workspace_plus_transcript",
|
|
14
|
+
"tools": [
|
|
15
|
+
"filesystem",
|
|
16
|
+
"shell"
|
|
17
|
+
],
|
|
18
|
+
"network": false
|
|
19
|
+
},
|
|
20
|
+
"loop": {
|
|
21
|
+
"defaultMaxIterations": 3,
|
|
22
|
+
"stopPolicy": "goal_satisfied_or_max"
|
|
23
|
+
},
|
|
24
|
+
"evaluation": {
|
|
25
|
+
"mode": "agentic_goal_review",
|
|
26
|
+
"scenarioContext": [
|
|
27
|
+
"This is a tiny local-app smoke task.",
|
|
28
|
+
"The user did not prescribe a framework or file layout."
|
|
29
|
+
],
|
|
30
|
+
"goalRubric": [
|
|
31
|
+
"The final workspace provides a locally viewable newsletter page.",
|
|
32
|
+
"The page presents three sample stories or an equivalent clearly visible set of story items.",
|
|
33
|
+
"The delivered result is the requested newsletter experience, not only an untouched starter template or prose-only answer."
|
|
34
|
+
],
|
|
35
|
+
"evidenceGuidance": [
|
|
36
|
+
"Inspect the final workspace and run the app or open the static page if needed.",
|
|
37
|
+
"Use Kestrel transcripts and event logs only as supporting evidence; judge the final delivered state."
|
|
38
|
+
]
|
|
39
|
+
}
|
|
40
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
person_id,full_name,email,company,role,owner,status,last_contacted
|
|
2
|
+
A-1001,Ada Lovelace,ADA@Analytical.Example,Analytical Engines,VP Product,Mira,active,2026-06-01
|
|
3
|
+
A-1002,Grace Hopper,grace.hopper@example.com,Compiler Labs,CTO,Noah,active,2026-05-28
|
|
4
|
+
A-1003,Katherine Johnson,k.johnson@example.com,Orbital Analytics,Director,Eli,active,2026-05-16
|
|
5
|
+
A-1004,Dorothy Vaughan,dorothy.vaughan@example.com,Orbital Analytics,Manager,Eli,nurture,2026-04-30
|
|
6
|
+
A-1005,Margaret Hamilton,mhamilton@example.com,Apollo Systems,Principal Engineer,Rae,active,2026-06-03
|
|
7
|
+
A-1006,Alan Turing,alan.turing@example.com,Manchester Machines,Research Lead,Mira,inactive,2026-03-12
|
|
8
|
+
A-1007,Barbara Liskov,barbara@liskov.example,Distributed Systems Co,Architect,Noah,active,2026-05-22
|
|
9
|
+
A-1008,Radia Perlman,radia.perlman@example.com,Network Bridges,Advisor,Rae,active,2026-05-19
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
contact_key,name,email_address,account,title,success_manager,lifecycle,updated_at
|
|
2
|
+
B-9001,Ada King,ada@analytical.example,Analytical Engines,Chief Product Officer,Mira,customer,2026-06-04
|
|
3
|
+
B-9002,Grace Hopper,grace.hopper@example.com,Compiler Labs,Chief Technology Officer,Noah,customer,2026-06-02
|
|
4
|
+
B-9003,Katherine G. Johnson,k.johnson@example.com,Orbital Analytics,Director of Flight Data,Eli,customer,2026-05-29
|
|
5
|
+
B-9004,Dorothy Vaughan,dorothy.vaughan@example.com,Orbital Analytics,Engineering Manager,Eli,customer,2026-06-01
|
|
6
|
+
B-9005,Margaret Hamilton,margaret.hamilton@example.com,Apollo Systems,Principal Engineer,Rae,customer,2026-06-05
|
|
7
|
+
B-9006,Barbara Liskov,barbara@liskov.example,Distributed Systems Co,Systems Architect,Noah,customer,2026-05-30
|
|
8
|
+
B-9007,Evelyn Boyd Granville,evelyn.granville@example.com,Orbital Analytics,Mathematician,Eli,prospect,2026-05-20
|
|
9
|
+
B-9008,Alan M. Turing,alan.turing@example.com,Manchester Machines,Research Director,Mira,churn_risk,2026-05-17
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
Build me a local Vite React + TypeScript app for reconciling two messy people CSV exports from different systems. It should run locally from this empty folder. Use the sample CSV files in prompt-assets/csv-reconciliation-people/source-a.csv and prompt-assets/csv-reconciliation-people/source-b.csv. The app should let me paste or load CSV text for Source A and Source B, normalize names and emails, detect likely duplicate people, flag missing records, show field conflicts, and produce a cleaned merged list. Include the provided sample rows in the app so I can try it immediately without finding my own data. I should be able to resolve conflicts, choose which source wins for a field, mark duplicates as reviewed, filter by conflict type, see reconciliation counts update as I work, persist my review locally, and export the cleaned result as JSON or CSV. Before you finish, make sure the Vite app builds successfully and check the core workflow like a user would: load the sample data, find duplicate people, resolve at least one conflicting field, mark a missing record for inclusion, export the cleaned list, import it back, and confirm the reconciliation counts still make sense.
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": "ruhroh_scenario_v2",
|
|
3
|
+
"id": "vite-csv-reconciliation",
|
|
4
|
+
"title": "Vite CSV Reconciliation",
|
|
5
|
+
"tier": "nightly",
|
|
6
|
+
"kind": "real_user",
|
|
7
|
+
"userPromptPath": "instruction.md",
|
|
8
|
+
"assets": [
|
|
9
|
+
"assets/prompt-assets/csv-reconciliation-people"
|
|
10
|
+
],
|
|
11
|
+
"run": {
|
|
12
|
+
"mode": "build",
|
|
13
|
+
"timeoutSeconds": 900
|
|
14
|
+
},
|
|
15
|
+
"requires": {
|
|
16
|
+
"continuity": "workspace_plus_transcript",
|
|
17
|
+
"tools": [
|
|
18
|
+
"filesystem",
|
|
19
|
+
"shell"
|
|
20
|
+
],
|
|
21
|
+
"network": true
|
|
22
|
+
},
|
|
23
|
+
"loop": {
|
|
24
|
+
"defaultMaxIterations": 3,
|
|
25
|
+
"stopPolicy": "goal_satisfied_or_max"
|
|
26
|
+
},
|
|
27
|
+
"evaluation": {
|
|
28
|
+
"mode": "agentic_goal_review",
|
|
29
|
+
"scenarioContext": [
|
|
30
|
+
"This is a local data-workflow app-development task with provided CSV assets.",
|
|
31
|
+
"The prompt-assets/csv-reconciliation-people directory is the source material supplied to the run-agent.",
|
|
32
|
+
"The benchmark value is CSV handling, reconciliation workflow, review state, and export/import behavior."
|
|
33
|
+
],
|
|
34
|
+
"goalRubric": [
|
|
35
|
+
"The final app is a Vite React + TypeScript app that can be installed and run locally by a reviewer.",
|
|
36
|
+
"The app uses or embeds the supplied Source A and Source B CSV sample data.",
|
|
37
|
+
"The app normalizes people records, detects likely duplicates, flags missing records, and surfaces field conflicts.",
|
|
38
|
+
"The user can resolve conflicts, choose winning field values, mark duplicates reviewed, and filter by conflict type.",
|
|
39
|
+
"Reconciliation counts update as review decisions change.",
|
|
40
|
+
"Review state persists locally and the cleaned result can be exported as JSON or CSV and imported back."
|
|
41
|
+
],
|
|
42
|
+
"evidenceGuidance": [
|
|
43
|
+
"Inspect the copied prompt assets and final workspace before judging.",
|
|
44
|
+
"Run build/start commands and exercise the reconciliation workflow when practical.",
|
|
45
|
+
"Prefer evidence from app behavior and exported data over source text matches."
|
|
46
|
+
]
|
|
47
|
+
}
|
|
48
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
Build me a Vite app for running a small product sprint planning session. It should run locally from this empty folder. The app should have a planning board with tasks grouped by status, a capacity view grouped by owner, and a risks/decisions view for tracking open questions. Seed it with realistic sprint data so I can use it immediately. I should be able to add and edit tasks with title, owner, estimate, priority, and status; move tasks between statuses; add risks or decisions; filter by owner or priority; and see sprint totals update as I work. Remember changes locally when I reload the page. Also include a way to export the sprint data as JSON and import it back into the app. Before you finish, make sure the Vite app builds successfully and check the core workflow like a user would: add a task, move it, filter the board, add a risk or decision, export data, import it back, and confirm the totals still make sense.
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": "ruhroh_scenario_v2",
|
|
3
|
+
"id": "vite-sprint-planner",
|
|
4
|
+
"title": "Vite Sprint Planner",
|
|
5
|
+
"tier": "nightly",
|
|
6
|
+
"kind": "real_user",
|
|
7
|
+
"userPromptPath": "instruction.md",
|
|
8
|
+
"run": {
|
|
9
|
+
"mode": "build",
|
|
10
|
+
"timeoutSeconds": 900
|
|
11
|
+
},
|
|
12
|
+
"requires": {
|
|
13
|
+
"continuity": "workspace_plus_transcript",
|
|
14
|
+
"tools": [
|
|
15
|
+
"filesystem",
|
|
16
|
+
"shell"
|
|
17
|
+
],
|
|
18
|
+
"network": true
|
|
19
|
+
},
|
|
20
|
+
"loop": {
|
|
21
|
+
"defaultMaxIterations": 3,
|
|
22
|
+
"stopPolicy": "goal_satisfied_or_max"
|
|
23
|
+
},
|
|
24
|
+
"evaluation": {
|
|
25
|
+
"mode": "agentic_goal_review",
|
|
26
|
+
"scenarioContext": [
|
|
27
|
+
"This is a scaffolded Vite local app-development task.",
|
|
28
|
+
"The benchmark value is the product workflow and local state behavior, not a prescribed file layout.",
|
|
29
|
+
"The user explicitly asked the run-agent to build and check the core workflow."
|
|
30
|
+
],
|
|
31
|
+
"goalRubric": [
|
|
32
|
+
"The final app is a Vite app that can be installed and run locally by a reviewer.",
|
|
33
|
+
"The app includes planning board, capacity, and risks/decisions views or equivalent user-visible areas.",
|
|
34
|
+
"Tasks can be added, edited, moved between statuses, filtered, and counted in sprint totals.",
|
|
35
|
+
"Risks or decisions can be added and reviewed.",
|
|
36
|
+
"Sprint data persists locally and can be exported as JSON and imported back.",
|
|
37
|
+
"The implementation is not an untouched starter template."
|
|
38
|
+
],
|
|
39
|
+
"evidenceGuidance": [
|
|
40
|
+
"Run build/start commands in the eval workspace when practical.",
|
|
41
|
+
"Explore the app workflow as a user: task creation, movement, filtering, risk/decision entry, export/import, and totals.",
|
|
42
|
+
"Use source inspection only to understand behavior when browser exploration is unavailable or incomplete."
|
|
43
|
+
]
|
|
44
|
+
}
|
|
45
|
+
}
|