clawbench-cli 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clawbench/__init__.py +35 -0
- clawbench/__main__.py +8 -0
- clawbench/batch.py +619 -0
- clawbench/cli.py +397 -0
- clawbench/data/chrome-extension/README.md +127 -0
- clawbench/data/chrome-extension/background.js +50 -0
- clawbench/data/chrome-extension/content.js +70 -0
- clawbench/data/chrome-extension/manifest.json +25 -0
- clawbench/data/chrome-extension/setup.sh +27 -0
- clawbench/data/chrome-extension/stealth.js +200 -0
- clawbench/data/docker/Dockerfile +51 -0
- clawbench/data/docker/entrypoint.sh +394 -0
- clawbench/data/docker/setup-openclaw.sh +112 -0
- clawbench/data/eval/README.md +95 -0
- clawbench/data/eval/agentic_eval.md +53 -0
- clawbench/data/extension-server/.python-version +1 -0
- clawbench/data/extension-server/README.md +54 -0
- clawbench/data/extension-server/pyproject.toml +7 -0
- clawbench/data/extension-server/server.py +360 -0
- clawbench/data/extension-server/uv.lock +644 -0
- clawbench/data/models/model.schema.json +44 -0
- clawbench/data/models/models.example.yaml +16 -0
- clawbench/data/shared/alex_green_personal_info.json +451 -0
- clawbench/data/test-cases/001-daily-life-food-uber-eats/task.json +25 -0
- clawbench/data/test-cases/002-daily-life-food-doordash/task.json +25 -0
- clawbench/data/test-cases/004-daily-life-food-instacart/extra_info/grocery_list.json +36 -0
- clawbench/data/test-cases/004-daily-life-food-instacart/task.json +30 -0
- clawbench/data/test-cases/006-daily-life-food-uber-eats/task.json +24 -0
- clawbench/data/test-cases/007-daily-life-food-instacart/extra_info/meal_plan.json +21 -0
- clawbench/data/test-cases/007-daily-life-food-instacart/task.json +30 -0
- clawbench/data/test-cases/011-daily-life-housing-zillow/task.json +25 -0
- clawbench/data/test-cases/015-daily-life-housing-craigslist/extra_info/listing_details.json +26 -0
- clawbench/data/test-cases/015-daily-life-housing-craigslist/task.json +30 -0
- clawbench/data/test-cases/035-daily-life-health-medical-betterhelp/task.json +25 -0
- clawbench/data/test-cases/041-daily-life-pets-rover/task.json +25 -0
- clawbench/data/test-cases/043-daily-life-pets-rover/extra_info/pet_info.json +12 -0
- clawbench/data/test-cases/043-daily-life-pets-rover/task.json +30 -0
- clawbench/data/test-cases/045-daily-life-personal-care-booksy/task.json +25 -0
- clawbench/data/test-cases/047-daily-life-personal-care-taskrabbit/extra_info/address_info.json +7 -0
- clawbench/data/test-cases/047-daily-life-personal-care-taskrabbit/task.json +30 -0
- clawbench/data/test-cases/086-job-search-hr-cv-autofill-greenhouse-meta/extra_info/job_links.json +5 -0
- clawbench/data/test-cases/086-job-search-hr-cv-autofill-greenhouse-meta/task.json +30 -0
- clawbench/data/test-cases/089-job-search-hr-cv-autofill-simplify-jobs/extra_info/job_links.json +5 -0
- clawbench/data/test-cases/089-job-search-hr-cv-autofill-simplify-jobs/task.json +30 -0
- clawbench/data/test-cases/091-job-search-hr-job-apply-indeed/task.json +25 -0
- clawbench/data/test-cases/120-office-secretary-tasks-email-mgmt-purelymail/task.json +28 -0
- clawbench/data/test-cases/121-office-secretary-tasks-email-mgmt-purelymail/task.json +28 -0
- clawbench/data/test-cases/128-office-secretary-tasks-email-mgmt-purelymail/task.json +28 -0
- clawbench/data/test-cases/134-office-secretary-tasks-calendar-calendly/task.json +25 -0
- clawbench/data/test-cases/137-office-secretary-tasks-calendar-doodle/extra_info/meeting_details.json +30 -0
- clawbench/data/test-cases/137-office-secretary-tasks-calendar-doodle/task.json +30 -0
- clawbench/data/test-cases/139-office-secretary-tasks-calendar-calendly/task.json +25 -0
- clawbench/data/test-cases/142-office-secretary-tasks-collab-trello/extra_info/task_list.json +29 -0
- clawbench/data/test-cases/142-office-secretary-tasks-collab-trello/task.json +30 -0
- clawbench/data/test-cases/179-dev-tech-github-ops-github/extra_info/config.json +13 -0
- clawbench/data/test-cases/179-dev-tech-github-ops-github/task.json +30 -0
- clawbench/data/test-cases/180-dev-tech-github-ops-github/task.json +25 -0
- clawbench/data/test-cases/215-academia-research-paper-tables-overleaf/extra_info/raw_results.json +47 -0
- clawbench/data/test-cases/215-academia-research-paper-tables-overleaf/task.json +30 -0
- clawbench/data/test-cases/242-academia-research-research-tools-overleaf/task.json +25 -0
- clawbench/data/test-cases/246-academia-research-research-tools-zotero/task.json +25 -0
- clawbench/data/test-cases/247-academia-research-research-tools-semantic-scholar/task.json +25 -0
- clawbench/data/test-cases/265-education-learning-general-coursera/task.json +25 -0
- clawbench/data/test-cases/266-education-learning-general-leetcode/extra_info/solution_code.py +9 -0
- clawbench/data/test-cases/266-education-learning-general-leetcode/task.json +30 -0
- clawbench/data/test-cases/273-education-learning-general-edx/task.json +25 -0
- clawbench/data/test-cases/274-education-learning-general-udemy/task.json +25 -0
- clawbench/data/test-cases/279-travel-general-airbnb/task.json +25 -0
- clawbench/data/test-cases/280-travel-general-booking-com/task.json +25 -0
- clawbench/data/test-cases/363-entertainment-hobbies-general-ticketmaster/task.json +25 -0
- clawbench/data/test-cases/369-entertainment-hobbies-general-goodreads/extra_info/book_list.json +14 -0
- clawbench/data/test-cases/369-entertainment-hobbies-general-goodreads/task.json +30 -0
- clawbench/data/test-cases/372-entertainment-hobbies-general-eventbrite/extra_info/event_details.json +10 -0
- clawbench/data/test-cases/372-entertainment-hobbies-general-eventbrite/task.json +30 -0
- clawbench/data/test-cases/403-personal-management-account-security-1password-web/extra_info/credentials.json +34 -0
- clawbench/data/test-cases/403-personal-management-account-security-1password-web/task.json +30 -0
- clawbench/data/test-cases/413-personal-management-personal-tools-todoist/extra_info/task_list.json +52 -0
- clawbench/data/test-cases/413-personal-management-personal-tools-todoist/task.json +30 -0
- clawbench/data/test-cases/468-rating-voting-general-glassdoor/extra_info/interview_experience.json +10 -0
- clawbench/data/test-cases/468-rating-voting-general-glassdoor/task.json +30 -0
- clawbench/data/test-cases/469-rating-voting-general-tripadvisor/extra_info/review_content.json +6 -0
- clawbench/data/test-cases/469-rating-voting-general-tripadvisor/task.json +30 -0
- clawbench/data/test-cases/470-rating-voting-general-trustpilot/extra_info/review_content.json +6 -0
- clawbench/data/test-cases/470-rating-voting-general-trustpilot/task.json +30 -0
- clawbench/data/test-cases/474-rating-voting-general-capterra/task.json +25 -0
- clawbench/data/test-cases/475-rating-voting-general-g2/task.json +25 -0
- clawbench/data/test-cases/482-creation-init-general-confluence/extra_info/content.json +3 -0
- clawbench/data/test-cases/482-creation-init-general-confluence/task.json +30 -0
- clawbench/data/test-cases/483-creation-init-general-airtable/task.json +25 -0
- clawbench/data/test-cases/484-creation-init-general-clickup/task.json +28 -0
- clawbench/data/test-cases/485-creation-init-general-webflow/task.json +25 -0
- clawbench/data/test-cases/486-creation-init-general-mailchimp/extra_info/content.json +3 -0
- clawbench/data/test-cases/486-creation-init-general-mailchimp/task.json +30 -0
- clawbench/data/test-cases/487-creation-init-general-typeform/extra_info/survey_questions.json +85 -0
- clawbench/data/test-cases/487-creation-init-general-typeform/task.json +30 -0
- clawbench/data/test-cases/488-creation-init-general-substack/extra_info/content.json +3 -0
- clawbench/data/test-cases/488-creation-init-general-substack/task.json +30 -0
- clawbench/data/test-cases/489-creation-init-general-ghost/extra_info/content.json +3 -0
- clawbench/data/test-cases/489-creation-init-general-ghost/task.json +30 -0
- clawbench/data/test-cases/501-creation-init-general-asana/extra_info/project_description.json +8 -0
- clawbench/data/test-cases/501-creation-init-general-asana/task.json +33 -0
- clawbench/data/test-cases/529-daily-life-shopping-delivery-king-arthur-baking/task.json +25 -0
- clawbench/data/test-cases/533-daily-life-utilities-inmyarea/task.json +25 -0
- clawbench/data/test-cases/535-daily-life-home-home-depot/task.json +25 -0
- clawbench/data/test-cases/537-daily-life-food-crumbl/task.json +25 -0
- clawbench/data/test-cases/539-daily-life-health-jefit/task.json +25 -0
- clawbench/data/test-cases/542-daily-life-pets-wag/task.json +25 -0
- clawbench/data/test-cases/551-finance-investment-crypto-wallet-trezor/task.json +25 -0
- clawbench/data/test-cases/552-finance-investment-business-payment-plooto/task.json +25 -0
- clawbench/data/test-cases/555-finance-investment-insurance-insureon/task.json +25 -0
- clawbench/data/test-cases/559-finance-investment-crowdfunding-frontfundr/task.json +25 -0
- clawbench/data/test-cases/564-daily-life-event-registration-race-roster/task.json +25 -0
- clawbench/data/test-cases/565-job-search-hr-job-search-jopwell/task.json +25 -0
- clawbench/data/test-cases/566-job-search-hr-job-search-ziprecruiter/extra_info/listing_details.json +26 -0
- clawbench/data/test-cases/566-job-search-hr-job-search-ziprecruiter/task.json +30 -0
- clawbench/data/test-cases/569-job-search-hr-job-search-careerbuilder/task.json +25 -0
- clawbench/data/test-cases/570-job-search-hr-job-search-hired/task.json +25 -0
- clawbench/data/test-cases/571-job-search-hr-recruitment-mgmt-workable/extra_info/listing_details.json +26 -0
- clawbench/data/test-cases/571-job-search-hr-recruitment-mgmt-workable/task.json +30 -0
- clawbench/data/test-cases/576-office-secretary-tasks-reports-ftc-reportfraud/task.json +25 -0
- clawbench/data/test-cases/583-office-secretary-tasks-support-tickets-freshdesk/task.json +25 -0
- clawbench/data/test-cases/598-academia-research-legal-docs-formswift/task.json +25 -0
- clawbench/data/test-cases/606-education-learning-kids-courses-outschool/task.json +25 -0
- clawbench/data/test-cases/607-education-learning-art-courses-creativebug/task.json +25 -0
- clawbench/data/test-cases/609-education-learning-meditation-spirit-rock-meditation-center/task.json +25 -0
- clawbench/data/test-cases/615-travel-flights-spirit-airlines/task.json +25 -0
- clawbench/data/test-cases/618-travel-train-bus-12go-asia/task.json +25 -0
- clawbench/data/test-cases/625-travel-camping-outdoor-parks-canada-reservations/task.json +25 -0
- clawbench/data/test-cases/626-travel-bus-flixbus/task.json +25 -0
- clawbench/data/test-cases/627-travel-flights-momondo/task.json +25 -0
- clawbench/data/test-cases/632-shopping-commerce-beauty-care-olaplex/task.json +25 -0
- clawbench/data/test-cases/634-shopping-commerce-apparel-dooney-bourke/task.json +25 -0
- clawbench/data/test-cases/635-shopping-commerce-gifts-uncommon-goods/task.json +25 -0
- clawbench/data/test-cases/636-shopping-commerce-auto-parts-rockauto/task.json +25 -0
- clawbench/data/test-cases/638-shopping-commerce-print-custom-vistaprint/task.json +25 -0
- clawbench/data/test-cases/639-shopping-commerce-luxury-mansur-gavriel/task.json +25 -0
- clawbench/data/test-cases/671-entertainment-gaming-humble-bundle/task.json +25 -0
- clawbench/data/test-cases/672-entertainment-hobbies-anime-streaming-crunchyroll/task.json +25 -0
- clawbench/data/test-cases/674-entertainment-hobbies-masterclass-masterclass/task.json +25 -0
- clawbench/data/test-cases/676-government-civic-legal-docs-legalnature/task.json +25 -0
- clawbench/data/test-cases/685-personal-management-budget-mgmt-everydollar/task.json +25 -0
- clawbench/data/test-cases/687-personal-management-vpn-subscription-ipvanish/task.json +25 -0
- clawbench/data/test-cases/688-personal-management-insurance-compare-insurify/task.json +25 -0
- clawbench/data/test-cases/695-automation-workflows-recurring-order-stumptown-coffee/task.json +25 -0
- clawbench/data/test-cases/697-automation-workflows-recurring-order-bean-box/task.json +25 -0
- clawbench/data/test-cases/699-automation-workflows-recurring-order-mistobox/task.json +25 -0
- clawbench/data/test-cases/700-deletion-revocation-data-deletion-deleteme/task.json +25 -0
- clawbench/data/test-cases/705-rating-voting-wine-review-vivino/task.json +25 -0
- clawbench/data/test-cases/706-rating-voting-beer-review-beeradvocate/task.json +25 -0
- clawbench/data/test-cases/707-rating-voting-social-wine-untappd/task.json +25 -0
- clawbench/data/test-cases/708-rating-voting-professor-review-ratemyprofessors/task.json +28 -0
- clawbench/data/test-cases/709-rating-voting-service-review-angi/task.json +25 -0
- clawbench/data/test-cases/710-creation-init-interior-design-roomsketcher/task.json +25 -0
- clawbench/data/test-cases/711-creation-init-color-design-coolors/task.json +25 -0
- clawbench/data/test-cases/712-creation-init-website-create-squarespace/task.json +25 -0
- clawbench/data/test-cases/713-creation-init-website-build-wix/task.json +25 -0
- clawbench/data/test-cases/735-home-services-maintenance-house-cleaning-bark/task.json +25 -0
- clawbench/data/test-cases/736-home-services-maintenance-plumbing-ace-hardware/task.json +25 -0
- clawbench/data/test-cases/737-home-services-maintenance-kitchen-remodel-lowes/task.json +25 -0
- clawbench/data/test-cases/738-home-services-maintenance-equipment-install-amazon-home-services/task.json +25 -0
- clawbench/data/test-cases/750-automotive-vehicle-services-car-insurance-compare-kanetix/task.json +25 -0
- clawbench/data/test-cases/751-automotive-vehicle-services-car-lease-sixt/task.json +25 -0
- clawbench/data/test-cases/754-automotive-vehicle-services-used-car-listing-autotrader/task.json +25 -0
- clawbench/data/test-cases/763-automotive-vehicle-services-car-lease-autoslash/task.json +25 -0
- clawbench/data/test-cases/766-nonprofit-charity-donation-doctors-without-borders-msf/task.json +25 -0
- clawbench/data/test-cases/768-nonprofit-charity-community-crowdfund-ioby/task.json +25 -0
- clawbench/data/test-cases/770-nonprofit-charity-volunteer-apply-on-make-a-wish-foundation-website-complete-and-submit-a-volunteer-application-form-selecting-the-wish-granter-role-and-entering-city-phoenix-az/task.json +25 -0
- clawbench/data/test-cases/774-nonprofit-charity-nonprofit-job-apply-charity-village/task.json +25 -0
- clawbench/data/test-cases/776-nonprofit-charity-volunteer-signup-idealist/task.json +25 -0
- clawbench/data/test-cases/778-nonprofit-charity-donation-globalgiving/extra_info/payment_info.json +3 -0
- clawbench/data/test-cases/778-nonprofit-charity-donation-globalgiving/task.json +30 -0
- clawbench/data/test-cases/780-beauty-personal-care-skincare-purchase-soko-glam/extra_info/address_info.json +4 -0
- clawbench/data/test-cases/780-beauty-personal-care-skincare-purchase-soko-glam/task.json +30 -0
- clawbench/data/test-cases/781-beauty-personal-care-beauty-booking-bluemercury/extra_info/email_info.json +3 -0
- clawbench/data/test-cases/781-beauty-personal-care-beauty-booking-bluemercury/task.json +30 -0
- clawbench/data/test-cases/782-beauty-personal-care-skincare-purchase-paulas-choice/task.json +24 -0
- clawbench/data/test-cases/783-beauty-personal-care-beauty-booking-ulta-beauty/task.json +24 -0
- clawbench/data/test-cases/785-beauty-personal-care-skincare-curology/task.json +25 -0
- clawbench/data/test-cases/788-beauty-personal-care-makeup-the-ordinary/task.json +25 -0
- clawbench/data/test-cases/789-beauty-personal-care-makeup-fenty-beauty/task.json +25 -0
- clawbench/data/test-cases/793-beauty-personal-care-beauty-retail-mac-cosmetics/task.json +25 -0
- clawbench/data/test-cases/794-beauty-personal-care-salon-booking-styleseat/task.json +25 -0
- clawbench/data/test-cases/795-pet-animal-care-pet-adoption-aspca/task.json +25 -0
- clawbench/data/test-cases/796-pet-animal-care-pet-supplies-grooming-petsmart/extra_info/pet_info.json +12 -0
- clawbench/data/test-cases/796-pet-animal-care-pet-supplies-grooming-petsmart/task.json +30 -0
- clawbench/data/test-cases/799-pet-animal-care-pet-insurance-aspca-pet-health-insurance/task.json +25 -0
- clawbench/data/test-cases/801-pet-animal-care-pet-friendly-travel-bringfido/task.json +25 -0
- clawbench/data/test-cases/803-pet-animal-care-pet-medical-pawp/extra_info/pet_info.json +12 -0
- clawbench/data/test-cases/803-pet-animal-care-pet-medical-pawp/task.json +30 -0
- clawbench/data/test-cases/807-pet-animal-care-pet-dna-embark/task.json +25 -0
- clawbench/data/test-cases/809-pet-animal-care-pet-adopt-petfinder/task.json +28 -0
- clawbench/data/test-cases/812-pet-animal-care-pet-subscription-ollie/task.json +25 -0
- clawbench/data/test-cases/815-personal-management-records-mgmt-myheritage/task.json +25 -0
- clawbench/data/test-cases/821-education-learning-reading-self-study-blinkist/task.json +25 -0
- clawbench/data/test-cases/861-entertainment-hobbies-movies-cineplex/task.json +25 -0
- clawbench/data/test-cases/862-entertainment-hobbies-movies-amc-theatres/task.json +25 -0
- clawbench/data/test-cases/864-entertainment-hobbies-show-tickets-ticketmaster/task.json +25 -0
- clawbench/data/test-cases/865-travel-outdoor-hipcamp/task.json +25 -0
- clawbench/data/test-cases/867-entertainment-hobbies-movies-fandango/task.json +25 -0
- clawbench/data/test-cases/872-daily-life-food-opentable/task.json +25 -0
- clawbench/data/test-cases/873-daily-life-food-resy/task.json +28 -0
- clawbench/data/test-cases/876-entertainment-hobbies-show-tickets-vivid-seats/task.json +25 -0
- clawbench/data/test-cases/877-entertainment-hobbies-show-tickets-stubhub/task.json +25 -0
- clawbench/data/test-cases/878-travel-outdoor-ontario-parks/task.json +25 -0
- clawbench/data/test-cases/883-education-learning-hobby-class-sur-la-table/task.json +25 -0
- clawbench/data/test-cases/884-entertainment-hobbies-experience-breakout-games/task.json +25 -0
- clawbench/data/test-cases/885-entertainment-hobbies-experience-bowlero/task.json +25 -0
- clawbench/data/test-cases/886-entertainment-hobbies-experience-topgolf/task.json +25 -0
- clawbench/data/test-cases/lite.json +226 -0
- clawbench/data/test-cases/lite.schema.json +105 -0
- clawbench/data/test-cases/task.schema.json +132 -0
- clawbench/data/tools/build_clawbench_lite_enc.py +161 -0
- clawbench/doctor.py +171 -0
- clawbench/engine.py +180 -0
- clawbench/generate_resume_pdf.py +140 -0
- clawbench/hf_upload.py +78 -0
- clawbench/image.py +127 -0
- clawbench/paths.py +150 -0
- clawbench/resume_template.json +104 -0
- clawbench/run.py +942 -0
- clawbench/tui.py +1401 -0
- clawbench_cli-0.1.2.dist-info/METADATA +770 -0
- clawbench_cli-0.1.2.dist-info/RECORD +226 -0
- clawbench_cli-0.1.2.dist-info/WHEEL +4 -0
- clawbench_cli-0.1.2.dist-info/entry_points.txt +4 -0
- clawbench_cli-0.1.2.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "../task.schema.json",
|
|
3
|
+
"metadata": {
|
|
4
|
+
"task_id": 535,
|
|
5
|
+
"metaclass": "daily-life",
|
|
6
|
+
"class": "home",
|
|
7
|
+
"description": "On Home Depot, search for a Milwaukee 18V cordless drill, select the highest-rated SKU, add to cart, choose in-store pickup, fill in personal info, and place the order",
|
|
8
|
+
"sites_involved": [
|
|
9
|
+
"homedepot.com"
|
|
10
|
+
],
|
|
11
|
+
"platform": "home-depot",
|
|
12
|
+
"common_info": {
|
|
13
|
+
"email_credentials": "credentials to use the assigned disposable email account",
|
|
14
|
+
"user_info": "alex_green_personal_info.json; the dummy user's personal information",
|
|
15
|
+
"user_resume": "PDF resume with disposable email account injected"
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"instruction": "On Home Depot, search for a Milwaukee 18V cordless drill, select the highest-rated SKU, add to cart, choose in-store pickup, fill in personal info, and place the order",
|
|
19
|
+
"eval_schema": {
|
|
20
|
+
"url_pattern": "__PLACEHOLDER_WILL_NOT_MATCH__",
|
|
21
|
+
"method": "POST"
|
|
22
|
+
},
|
|
23
|
+
"time_limit": 30,
|
|
24
|
+
"extra_info": []
|
|
25
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "../task.schema.json",
|
|
3
|
+
"metadata": {
|
|
4
|
+
"task_id": 537,
|
|
5
|
+
"metaclass": "daily-life",
|
|
6
|
+
"class": "food",
|
|
7
|
+
"description": "On Crumbl, browse the current week's cookie flavors, add a 6-count box (including at least one Chocolate Chip and one Pink Sugar), select delivery, fill in address, and place the order",
|
|
8
|
+
"sites_involved": [
|
|
9
|
+
"crumbl.com"
|
|
10
|
+
],
|
|
11
|
+
"platform": "crumbl",
|
|
12
|
+
"common_info": {
|
|
13
|
+
"email_credentials": "credentials to use the assigned disposable email account",
|
|
14
|
+
"user_info": "alex_green_personal_info.json; the dummy user's personal information",
|
|
15
|
+
"user_resume": "PDF resume with disposable email account injected"
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"instruction": "On Crumbl, browse the current week's cookie flavors, add a 6-count box (including at least one Chocolate Chip and one Pink Sugar), select delivery, fill in address, and place the order",
|
|
19
|
+
"eval_schema": {
|
|
20
|
+
"url_pattern": "__PLACEHOLDER_WILL_NOT_MATCH__",
|
|
21
|
+
"method": "POST"
|
|
22
|
+
},
|
|
23
|
+
"time_limit": 30,
|
|
24
|
+
"extra_info": []
|
|
25
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "../task.schema.json",
|
|
3
|
+
"metadata": {
|
|
4
|
+
"task_id": 539,
|
|
5
|
+
"metaclass": "daily-life",
|
|
6
|
+
"class": "health",
|
|
7
|
+
"description": "On Jefit, register a free account, then upgrade to the Elite annual membership plan on the web app and complete the payment flow",
|
|
8
|
+
"sites_involved": [
|
|
9
|
+
"jefit.com"
|
|
10
|
+
],
|
|
11
|
+
"platform": "jefit",
|
|
12
|
+
"common_info": {
|
|
13
|
+
"email_credentials": "credentials to use the assigned disposable email account",
|
|
14
|
+
"user_info": "alex_green_personal_info.json; the dummy user's personal information",
|
|
15
|
+
"user_resume": "PDF resume with disposable email account injected"
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"instruction": "On Jefit, register a free account, then upgrade to the Elite annual membership plan on the web app and complete the payment flow",
|
|
19
|
+
"eval_schema": {
|
|
20
|
+
"url_pattern": "__PLACEHOLDER_WILL_NOT_MATCH__",
|
|
21
|
+
"method": "POST"
|
|
22
|
+
},
|
|
23
|
+
"time_limit": 30,
|
|
24
|
+
"extra_info": []
|
|
25
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "../task.schema.json",
|
|
3
|
+
"metadata": {
|
|
4
|
+
"task_id": 542,
|
|
5
|
+
"metaclass": "daily-life",
|
|
6
|
+
"class": "pets",
|
|
7
|
+
"description": "On Wag!, book a 30-minute Solo Walk dog walking service, fill in pet info (dog name, breed, weight), select the next day 8–10 AM, and submit the booking",
|
|
8
|
+
"sites_involved": [
|
|
9
|
+
"wagwalking.com"
|
|
10
|
+
],
|
|
11
|
+
"platform": "wag",
|
|
12
|
+
"common_info": {
|
|
13
|
+
"email_credentials": "credentials to use the assigned disposable email account",
|
|
14
|
+
"user_info": "alex_green_personal_info.json; the dummy user's personal information",
|
|
15
|
+
"user_resume": "PDF resume with disposable email account injected"
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"instruction": "On Wag!, book a 30-minute Solo Walk dog walking service, fill in pet info (dog name, breed, weight), select the next day 8–10 AM, and submit the booking",
|
|
19
|
+
"eval_schema": {
|
|
20
|
+
"url_pattern": "wagwalking\\.com/(api|graphql).*(book|walk|request|order|checkout|schedule)",
|
|
21
|
+
"method": "POST"
|
|
22
|
+
},
|
|
23
|
+
"time_limit": 30,
|
|
24
|
+
"extra_info": []
|
|
25
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "../task.schema.json",
|
|
3
|
+
"metadata": {
|
|
4
|
+
"task_id": 551,
|
|
5
|
+
"metaclass": "finance-investment",
|
|
6
|
+
"class": "crypto-hardware",
|
|
7
|
+
"description": "Add a Trezor Safe 5 hardware wallet to the cart on the Trezor website and proceed to checkout by entering shipping details (name, address, country)",
|
|
8
|
+
"sites_involved": [
|
|
9
|
+
"trezor.io"
|
|
10
|
+
],
|
|
11
|
+
"platform": "trezor",
|
|
12
|
+
"common_info": {
|
|
13
|
+
"email_credentials": "credentials to use the assigned disposable email account",
|
|
14
|
+
"user_info": "alex_green_personal_info.json; the dummy user's personal information",
|
|
15
|
+
"user_resume": "PDF resume with disposable email account injected"
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"instruction": "Add a Trezor Safe 5 hardware wallet to the cart on the Trezor website and proceed to checkout by entering shipping details (name, address, country)",
|
|
19
|
+
"eval_schema": {
|
|
20
|
+
"url_pattern": "__PLACEHOLDER_WILL_NOT_MATCH__",
|
|
21
|
+
"method": "POST"
|
|
22
|
+
},
|
|
23
|
+
"time_limit": 30,
|
|
24
|
+
"extra_info": []
|
|
25
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "../task.schema.json",
|
|
3
|
+
"metadata": {
|
|
4
|
+
"task_id": 552,
|
|
5
|
+
"metaclass": "finance-investment",
|
|
6
|
+
"class": "business-payment",
|
|
7
|
+
"description": "Register a business payments account on Plooto by filling in company information (company name, industry, monthly payment volume) and completing the sign-up submission",
|
|
8
|
+
"sites_involved": [
|
|
9
|
+
"plooto.com"
|
|
10
|
+
],
|
|
11
|
+
"platform": "plooto",
|
|
12
|
+
"common_info": {
|
|
13
|
+
"email_credentials": "credentials to use the assigned disposable email account",
|
|
14
|
+
"user_info": "alex_green_personal_info.json; the dummy user's personal information",
|
|
15
|
+
"user_resume": "PDF resume with disposable email account injected"
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"instruction": "Register a business payments account on Plooto by filling in company information (company name, industry, monthly payment volume) and completing the sign-up submission",
|
|
19
|
+
"eval_schema": {
|
|
20
|
+
"url_pattern": "api\\.plooto\\.com/v1/user/registerFull",
|
|
21
|
+
"method": "POST"
|
|
22
|
+
},
|
|
23
|
+
"time_limit": 30,
|
|
24
|
+
"extra_info": []
|
|
25
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "../task.schema.json",
|
|
3
|
+
"metadata": {
|
|
4
|
+
"task_id": 555,
|
|
5
|
+
"metaclass": "finance-investment",
|
|
6
|
+
"class": "insurance",
|
|
7
|
+
"description": "Apply for a general liability insurance quote on Insureon for a small consulting firm, enter company name, 5 employees, $200,000 annual revenue, and submit to get a quote",
|
|
8
|
+
"sites_involved": [
|
|
9
|
+
"insureon.com"
|
|
10
|
+
],
|
|
11
|
+
"platform": "insureon",
|
|
12
|
+
"common_info": {
|
|
13
|
+
"email_credentials": "credentials to use the assigned disposable email account",
|
|
14
|
+
"user_info": "alex_green_personal_info.json; the dummy user's personal information",
|
|
15
|
+
"user_resume": "PDF resume with disposable email account injected"
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"instruction": "Apply for a general liability insurance quote on Insureon for a small consulting firm, enter company name, 5 employees, $200,000 annual revenue, and submit to get a quote",
|
|
19
|
+
"eval_schema": {
|
|
20
|
+
"url_pattern": "ua\\.apps\\.insureon\\.com/api/primary_ua/api/success/",
|
|
21
|
+
"method": "POST"
|
|
22
|
+
},
|
|
23
|
+
"time_limit": 30,
|
|
24
|
+
"extra_info": []
|
|
25
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "../task.schema.json",
|
|
3
|
+
"metadata": {
|
|
4
|
+
"task_id": 559,
|
|
5
|
+
"metaclass": "finance-investment",
|
|
6
|
+
"class": "crowdfunding",
|
|
7
|
+
"description": "Register an account on FrontFundr, browse active startup investment offerings, select one, and submit an expression of interest (soft commitment)",
|
|
8
|
+
"sites_involved": [
|
|
9
|
+
"frontfundr.com"
|
|
10
|
+
],
|
|
11
|
+
"platform": "frontfundr",
|
|
12
|
+
"common_info": {
|
|
13
|
+
"email_credentials": "credentials to use the assigned disposable email account",
|
|
14
|
+
"user_info": "alex_green_personal_info.json; the dummy user's personal information",
|
|
15
|
+
"user_resume": "PDF resume with disposable email account injected"
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"instruction": "Register an account on FrontFundr, browse active startup investment offerings, select one, and submit an expression of interest (soft commitment)",
|
|
19
|
+
"eval_schema": {
|
|
20
|
+
"url_pattern": "__PLACEHOLDER_WILL_NOT_MATCH__",
|
|
21
|
+
"method": "POST"
|
|
22
|
+
},
|
|
23
|
+
"time_limit": 30,
|
|
24
|
+
"extra_info": []
|
|
25
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "../task.schema.json",
|
|
3
|
+
"metadata": {
|
|
4
|
+
"task_id": 564,
|
|
5
|
+
"metaclass": "daily-life",
|
|
6
|
+
"class": "event-registration",
|
|
7
|
+
"description": "Search Race Roster for running events in the Toronto area select a half-marathon and complete online registration with entry fee payment",
|
|
8
|
+
"sites_involved": [
|
|
9
|
+
"raceroster.com"
|
|
10
|
+
],
|
|
11
|
+
"platform": "race-roster",
|
|
12
|
+
"common_info": {
|
|
13
|
+
"email_credentials": "credentials to use the assigned disposable email account",
|
|
14
|
+
"user_info": "alex_green_personal_info.json; the dummy user's personal information",
|
|
15
|
+
"user_resume": "PDF resume with disposable email account injected"
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"instruction": "Search Race Roster for running events in the Toronto area select a half-marathon and complete online registration with entry fee payment",
|
|
19
|
+
"eval_schema": {
|
|
20
|
+
"url_pattern": "__PLACEHOLDER_WILL_NOT_MATCH__",
|
|
21
|
+
"method": "POST"
|
|
22
|
+
},
|
|
23
|
+
"time_limit": 30,
|
|
24
|
+
"extra_info": []
|
|
25
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "../task.schema.json",
|
|
3
|
+
"metadata": {
|
|
4
|
+
"task_id": 565,
|
|
5
|
+
"metaclass": "job-search-hr",
|
|
6
|
+
"class": "job-apply",
|
|
7
|
+
"description": "Create a job seeker profile on Jopwell, fill in education background and career interests, and apply to a diversity fellowship program",
|
|
8
|
+
"sites_involved": [
|
|
9
|
+
"jopwell.com"
|
|
10
|
+
],
|
|
11
|
+
"platform": "jopwell",
|
|
12
|
+
"common_info": {
|
|
13
|
+
"email_credentials": "credentials to use the assigned disposable email account",
|
|
14
|
+
"user_info": "alex_green_personal_info.json; the dummy user's personal information",
|
|
15
|
+
"user_resume": "PDF resume with disposable email account injected"
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"instruction": "Create a job seeker profile on Jopwell, fill in education background and career interests, and apply to a diversity fellowship program",
|
|
19
|
+
"eval_schema": {
|
|
20
|
+
"url_pattern": "platform\\.aboveboard\\.com/api/v1/(users|profile|applications|fellowship)",
|
|
21
|
+
"method": "POST"
|
|
22
|
+
},
|
|
23
|
+
"time_limit": 30,
|
|
24
|
+
"extra_info": []
|
|
25
|
+
}
|
clawbench/data/test-cases/566-job-search-hr-job-search-ziprecruiter/extra_info/listing_details.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
{
|
|
2
|
+
"listing_title": "Bright 1BR in Toronto Annex, Furnished, July-August Sublet",
|
|
3
|
+
"description": "Spacious furnished one-bedroom apartment in the heart of Toronto's Annex neighborhood. Walking distance to U of T campus, TTC subway, and Bloor Street shops and restaurants. The unit features hardwood floors, in-suite laundry, a fully equipped kitchen, and a sunny south-facing balcony. Perfect for visiting scholars or summer interns.",
|
|
4
|
+
"price": 2400,
|
|
5
|
+
"currency": "CAD",
|
|
6
|
+
"period": "monthly",
|
|
7
|
+
"available_from": "2026-07-01",
|
|
8
|
+
"available_to": "2026-08-31",
|
|
9
|
+
"furnished": true,
|
|
10
|
+
"bedrooms": 1,
|
|
11
|
+
"bathrooms": 1,
|
|
12
|
+
"amenities": [
|
|
13
|
+
"In-suite laundry",
|
|
14
|
+
"Dishwasher",
|
|
15
|
+
"Balcony",
|
|
16
|
+
"Air conditioning",
|
|
17
|
+
"WiFi included"
|
|
18
|
+
],
|
|
19
|
+
"photos": [
|
|
20
|
+
"living_room.jpg",
|
|
21
|
+
"bedroom.jpg",
|
|
22
|
+
"kitchen.jpg",
|
|
23
|
+
"balcony.jpg"
|
|
24
|
+
],
|
|
25
|
+
"contact_email": "alex.green.uoft@clawbench.cc"
|
|
26
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "../task.schema.json",
|
|
3
|
+
"metadata": {
|
|
4
|
+
"task_id": 566,
|
|
5
|
+
"metaclass": "job-search-hr",
|
|
6
|
+
"class": "job-apply",
|
|
7
|
+
"description": "Register an employer account on ZipRecruiter, post a Software Engineer job listing with salary range and location",
|
|
8
|
+
"sites_involved": [
|
|
9
|
+
"ziprecruiter.com"
|
|
10
|
+
],
|
|
11
|
+
"platform": "ziprecruiter",
|
|
12
|
+
"common_info": {
|
|
13
|
+
"email_credentials": "credentials to use the assigned disposable email account",
|
|
14
|
+
"user_info": "alex_green_personal_info.json; the dummy user's personal information",
|
|
15
|
+
"user_resume": "PDF resume with disposable email account injected"
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"instruction": "Register an employer account on ZipRecruiter, post a Software Engineer job listing with salary range and location",
|
|
19
|
+
"eval_schema": {
|
|
20
|
+
"url_pattern": "ziprecruiter\\.com/quiz/create",
|
|
21
|
+
"method": "POST"
|
|
22
|
+
},
|
|
23
|
+
"time_limit": 30,
|
|
24
|
+
"extra_info": [
|
|
25
|
+
{
|
|
26
|
+
"path": "extra_info/listing_details.json",
|
|
27
|
+
"description": "Sublet listing details including description, amenities, and photos"
|
|
28
|
+
}
|
|
29
|
+
]
|
|
30
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "../task.schema.json",
|
|
3
|
+
"metadata": {
|
|
4
|
+
"task_id": 569,
|
|
5
|
+
"metaclass": "job-search-hr",
|
|
6
|
+
"class": "job-apply",
|
|
7
|
+
"description": "Upload a resume to CareerBuilder, set a job alert for \"Product Manager\" in Chicago, and complete the job seeker profile",
|
|
8
|
+
"sites_involved": [
|
|
9
|
+
"careerbuilder.com"
|
|
10
|
+
],
|
|
11
|
+
"platform": "careerbuilder",
|
|
12
|
+
"common_info": {
|
|
13
|
+
"email_credentials": "credentials to use the assigned disposable email account",
|
|
14
|
+
"user_info": "alex_green_personal_info.json; the dummy user's personal information",
|
|
15
|
+
"user_resume": "PDF resume with disposable email account injected"
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"instruction": "Upload a resume to CareerBuilder, set a job alert for \"Product Manager\" in Chicago, and complete the job seeker profile",
|
|
19
|
+
"eval_schema": {
|
|
20
|
+
"url_pattern": "appsapi\\.monster\\.io/profiles-profile-app-service/v3/me/job-preferences",
|
|
21
|
+
"method": "POST"
|
|
22
|
+
},
|
|
23
|
+
"time_limit": 30,
|
|
24
|
+
"extra_info": []
|
|
25
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "../task.schema.json",
|
|
3
|
+
"metadata": {
|
|
4
|
+
"task_id": 570,
|
|
5
|
+
"metaclass": "job-search-hr",
|
|
6
|
+
"class": "job-apply",
|
|
7
|
+
"description": "Create a candidate profile on Hired, fill in technical skills and expected salary, and submit profile for employer matching",
|
|
8
|
+
"sites_involved": [
|
|
9
|
+
"hired.com"
|
|
10
|
+
],
|
|
11
|
+
"platform": "hired",
|
|
12
|
+
"common_info": {
|
|
13
|
+
"email_credentials": "credentials to use the assigned disposable email account",
|
|
14
|
+
"user_info": "alex_green_personal_info.json; the dummy user's personal information",
|
|
15
|
+
"user_resume": "PDF resume with disposable email account injected"
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"instruction": "Create a candidate profile on Hired, fill in technical skills and expected salary, and submit profile for employer matching",
|
|
19
|
+
"eval_schema": {
|
|
20
|
+
"url_pattern": "api\\.adecco\\.com/aamBackend/tenants?/133/users?/[^/]+/userSkills",
|
|
21
|
+
"method": "PATCH"
|
|
22
|
+
},
|
|
23
|
+
"time_limit": 30,
|
|
24
|
+
"extra_info": []
|
|
25
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
{
|
|
2
|
+
"listing_title": "Bright 1BR in Toronto Annex, Furnished, July-August Sublet",
|
|
3
|
+
"description": "Spacious furnished one-bedroom apartment in the heart of Toronto's Annex neighborhood. Walking distance to U of T campus, TTC subway, and Bloor Street shops and restaurants. The unit features hardwood floors, in-suite laundry, a fully equipped kitchen, and a sunny south-facing balcony. Perfect for visiting scholars or summer interns.",
|
|
4
|
+
"price": 2400,
|
|
5
|
+
"currency": "CAD",
|
|
6
|
+
"period": "monthly",
|
|
7
|
+
"available_from": "2026-07-01",
|
|
8
|
+
"available_to": "2026-08-31",
|
|
9
|
+
"furnished": true,
|
|
10
|
+
"bedrooms": 1,
|
|
11
|
+
"bathrooms": 1,
|
|
12
|
+
"amenities": [
|
|
13
|
+
"In-suite laundry",
|
|
14
|
+
"Dishwasher",
|
|
15
|
+
"Balcony",
|
|
16
|
+
"Air conditioning",
|
|
17
|
+
"WiFi included"
|
|
18
|
+
],
|
|
19
|
+
"photos": [
|
|
20
|
+
"living_room.jpg",
|
|
21
|
+
"bedroom.jpg",
|
|
22
|
+
"kitchen.jpg",
|
|
23
|
+
"balcony.jpg"
|
|
24
|
+
],
|
|
25
|
+
"contact_email": "alex.green.uoft@clawbench.cc"
|
|
26
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "../task.schema.json",
|
|
3
|
+
"metadata": {
|
|
4
|
+
"task_id": 571,
|
|
5
|
+
"metaclass": "job-search-hr",
|
|
6
|
+
"class": "recruitment-mgmt",
|
|
7
|
+
"description": "Sign up for a Workable free trial, create a new job position, and publish it to the Workable careers page",
|
|
8
|
+
"sites_involved": [
|
|
9
|
+
"workable.com"
|
|
10
|
+
],
|
|
11
|
+
"platform": "workable",
|
|
12
|
+
"common_info": {
|
|
13
|
+
"email_credentials": "credentials to use the assigned disposable email account",
|
|
14
|
+
"user_info": "alex_green_personal_info.json; the dummy user's personal information",
|
|
15
|
+
"user_resume": "PDF resume with disposable email account injected"
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"instruction": "Sign up for a Workable free trial, create a new job position, and publish it to the Workable careers page",
|
|
19
|
+
"eval_schema": {
|
|
20
|
+
"url_pattern": "workable\\.com.*(jobs?/(create|publish|new|draft)|api/(spi|v\\d+)/jobs|careers/publish|/openings)",
|
|
21
|
+
"method": "POST"
|
|
22
|
+
},
|
|
23
|
+
"time_limit": 30,
|
|
24
|
+
"extra_info": [
|
|
25
|
+
{
|
|
26
|
+
"path": "extra_info/listing_details.json",
|
|
27
|
+
"description": "Sublet listing details including description, amenities, and photos"
|
|
28
|
+
}
|
|
29
|
+
]
|
|
30
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "../task.schema.json",
|
|
3
|
+
"metadata": {
|
|
4
|
+
"task_id": 576,
|
|
5
|
+
"metaclass": "office-secretary-tasks",
|
|
6
|
+
"class": "report-filing",
|
|
7
|
+
"description": "Use FTC ReportFraud to file a report about an online scam: describe receiving a fake prize email demanding $500 in fees, complete the full report form and submit",
|
|
8
|
+
"sites_involved": [
|
|
9
|
+
"reportfraud.ftc.gov"
|
|
10
|
+
],
|
|
11
|
+
"platform": "ftc-reportfraud",
|
|
12
|
+
"common_info": {
|
|
13
|
+
"email_credentials": "credentials to use the assigned disposable email account",
|
|
14
|
+
"user_info": "alex_green_personal_info.json; the dummy user's personal information",
|
|
15
|
+
"user_resume": "PDF resume with disposable email account injected"
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"instruction": "Use FTC ReportFraud to file a report about an online scam: describe receiving a fake prize email demanding $500 in fees, complete the full report form and submit",
|
|
19
|
+
"eval_schema": {
|
|
20
|
+
"url_pattern": "https?://reportfraud\\.ftc\\.gov/.*(api|submit|report|complaint)",
|
|
21
|
+
"method": "POST"
|
|
22
|
+
},
|
|
23
|
+
"time_limit": 30,
|
|
24
|
+
"extra_info": []
|
|
25
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "../task.schema.json",
|
|
3
|
+
"metadata": {
|
|
4
|
+
"task_id": 583,
|
|
5
|
+
"metaclass": "office-secretary-tasks",
|
|
6
|
+
"class": "customer-support",
|
|
7
|
+
"description": "Register a free Freshdesk Sprout account, create a new support ticket with subject \"Invoice Dispute - Order #10042\", describe a billing discrepancy, set priority to High, and assign to a test agent",
|
|
8
|
+
"sites_involved": [
|
|
9
|
+
"freshdesk.com"
|
|
10
|
+
],
|
|
11
|
+
"platform": "freshdesk",
|
|
12
|
+
"common_info": {
|
|
13
|
+
"email_credentials": "credentials to use the assigned disposable email account",
|
|
14
|
+
"user_info": "alex_green_personal_info.json; the dummy user's personal information",
|
|
15
|
+
"user_resume": "PDF resume with disposable email account injected"
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"instruction": "Register a free Freshdesk Sprout account, create a new support ticket with subject \"Invoice Dispute - Order #10042\", describe a billing discrepancy, set priority to High, and assign to a test agent",
|
|
19
|
+
"eval_schema": {
|
|
20
|
+
"url_pattern": "freshdesk\\.com/api/_/tickets",
|
|
21
|
+
"method": "POST"
|
|
22
|
+
},
|
|
23
|
+
"time_limit": 30,
|
|
24
|
+
"extra_info": []
|
|
25
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "../task.schema.json",
|
|
3
|
+
"metadata": {
|
|
4
|
+
"task_id": 598,
|
|
5
|
+
"metaclass": "academia-research",
|
|
6
|
+
"class": "legal-docs",
|
|
7
|
+
"description": "On FormSwift, search for a \"Power of Attorney\" template, fill in the principal's name, agent's name, and scope of authority, then download the completed document",
|
|
8
|
+
"sites_involved": [
|
|
9
|
+
"formswift.com"
|
|
10
|
+
],
|
|
11
|
+
"platform": "formswift",
|
|
12
|
+
"common_info": {
|
|
13
|
+
"email_credentials": "credentials to use the assigned disposable email account",
|
|
14
|
+
"user_info": "alex_green_personal_info.json; the dummy user's personal information",
|
|
15
|
+
"user_resume": "PDF resume with disposable email account injected"
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"instruction": "On FormSwift, search for a \"Power of Attorney\" template, fill in the principal's name, agent's name, and scope of authority, then download the completed document",
|
|
19
|
+
"eval_schema": {
|
|
20
|
+
"url_pattern": "api\\.formswift\\.com/static/document/save/",
|
|
21
|
+
"method": "POST"
|
|
22
|
+
},
|
|
23
|
+
"time_limit": 30,
|
|
24
|
+
"extra_info": []
|
|
25
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "../task.schema.json",
|
|
3
|
+
"metadata": {
|
|
4
|
+
"task_id": 606,
|
|
5
|
+
"metaclass": "education-learning",
|
|
6
|
+
"class": "kids-courses",
|
|
7
|
+
"description": "Search for a beginner Python coding class for kids ages 10\u201312 on Outschool, select a session, and complete enrollment with payment",
|
|
8
|
+
"sites_involved": [
|
|
9
|
+
"outschool.com"
|
|
10
|
+
],
|
|
11
|
+
"platform": "outschool",
|
|
12
|
+
"common_info": {
|
|
13
|
+
"email_credentials": "credentials to use the assigned disposable email account",
|
|
14
|
+
"user_info": "alex_green_personal_info.json; the dummy user's personal information",
|
|
15
|
+
"user_resume": "PDF resume with disposable email account injected"
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"instruction": "Search for a beginner Python coding class for kids ages 10\u201312 on Outschool, select a session, and complete enrollment with payment",
|
|
19
|
+
"eval_schema": {
|
|
20
|
+
"url_pattern": "outschool\\.com/graphql/AcceptPaymentWithStripe",
|
|
21
|
+
"method": "POST"
|
|
22
|
+
},
|
|
23
|
+
"time_limit": 30,
|
|
24
|
+
"extra_info": []
|
|
25
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "../task.schema.json",
|
|
3
|
+
"metadata": {
|
|
4
|
+
"task_id": 607,
|
|
5
|
+
"metaclass": "education-learning",
|
|
6
|
+
"class": "art-courses",
|
|
7
|
+
"description": "Sign up for the Creativebug free trial, browse Drawing & Illustration classes, select a beginner course, and begin watching the first lesson",
|
|
8
|
+
"sites_involved": [
|
|
9
|
+
"creativebug.com"
|
|
10
|
+
],
|
|
11
|
+
"platform": "creativebug",
|
|
12
|
+
"common_info": {
|
|
13
|
+
"email_credentials": "credentials to use the assigned disposable email account",
|
|
14
|
+
"user_info": "alex_green_personal_info.json; the dummy user's personal information",
|
|
15
|
+
"user_resume": "PDF resume with disposable email account injected"
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"instruction": "Sign up for the Creativebug free trial, browse Drawing & Illustration classes, select a beginner course, and begin watching the first lesson",
|
|
19
|
+
"eval_schema": {
|
|
20
|
+
"url_pattern": "creativebug\\.com/trial/confirmation",
|
|
21
|
+
"method": "POST"
|
|
22
|
+
},
|
|
23
|
+
"time_limit": 30,
|
|
24
|
+
"extra_info": []
|
|
25
|
+
}
|
clawbench/data/test-cases/609-education-learning-meditation-spirit-rock-meditation-center/task.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "../task.schema.json",
|
|
3
|
+
"metadata": {
|
|
4
|
+
"task_id": 609,
|
|
5
|
+
"metaclass": "education-learning",
|
|
6
|
+
"class": "meditation",
|
|
7
|
+
"description": "Browse upcoming weekend retreat offerings on the Spirit Rock Meditation Center website, select one session, and complete the registration",
|
|
8
|
+
"sites_involved": [
|
|
9
|
+
"spiritrock.org"
|
|
10
|
+
],
|
|
11
|
+
"platform": "spirit-rock",
|
|
12
|
+
"common_info": {
|
|
13
|
+
"email_credentials": "credentials to use the assigned disposable email account",
|
|
14
|
+
"user_info": "alex_green_personal_info.json; the dummy user's personal information",
|
|
15
|
+
"user_resume": "PDF resume with disposable email account injected"
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"instruction": "Browse upcoming weekend retreat offerings on the Spirit Rock Meditation Center website, select one session, and complete the registration",
|
|
19
|
+
"eval_schema": {
|
|
20
|
+
"url_pattern": "spirit-rock\\.secure\\.retreat\\.guru/program/.*\\?form=1",
|
|
21
|
+
"method": "POST"
|
|
22
|
+
},
|
|
23
|
+
"time_limit": 30,
|
|
24
|
+
"extra_info": []
|
|
25
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "../task.schema.json",
|
|
3
|
+
"metadata": {
|
|
4
|
+
"task_id": 615,
|
|
5
|
+
"metaclass": "travel",
|
|
6
|
+
"class": "flights",
|
|
7
|
+
"description": "On Spirit Airlines, search for a bare-fare one-way flight from Fort Lauderdale to San Juan, select the cheapest option, add a carry-on bag, and proceed to payment",
|
|
8
|
+
"sites_involved": [
|
|
9
|
+
"spirit.com"
|
|
10
|
+
],
|
|
11
|
+
"platform": "spirit-airlines",
|
|
12
|
+
"common_info": {
|
|
13
|
+
"email_credentials": "credentials to use the assigned disposable email account",
|
|
14
|
+
"user_info": "alex_green_personal_info.json; the dummy user's personal information",
|
|
15
|
+
"user_resume": "PDF resume with disposable email account injected"
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"instruction": "On Spirit Airlines, search for a bare-fare one-way flight from Fort Lauderdale to San Juan, select the cheapest option, add a carry-on bag, and proceed to payment",
|
|
19
|
+
"eval_schema": {
|
|
20
|
+
"url_pattern": "__PLACEHOLDER_WILL_NOT_MATCH__",
|
|
21
|
+
"method": "POST"
|
|
22
|
+
},
|
|
23
|
+
"time_limit": 30,
|
|
24
|
+
"extra_info": []
|
|
25
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "../task.schema.json",
|
|
3
|
+
"metadata": {
|
|
4
|
+
"task_id": 618,
|
|
5
|
+
"metaclass": "travel",
|
|
6
|
+
"class": "train-bus",
|
|
7
|
+
"description": "On 12Go Asia, book a train ticket from Bangkok to Chiang Mai (sleeper class), fill in passenger details, and proceed to the payment page",
|
|
8
|
+
"sites_involved": [
|
|
9
|
+
"12go.asia"
|
|
10
|
+
],
|
|
11
|
+
"platform": "12go-asia",
|
|
12
|
+
"common_info": {
|
|
13
|
+
"email_credentials": "credentials to use the assigned disposable email account",
|
|
14
|
+
"user_info": "alex_green_personal_info.json; the dummy user's personal information",
|
|
15
|
+
"user_resume": "PDF resume with disposable email account injected"
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"instruction": "On 12Go Asia, book a train ticket from Bangkok to Chiang Mai (sleeper class), fill in passenger details, and proceed to the payment page",
|
|
19
|
+
"eval_schema": {
|
|
20
|
+
"url_pattern": "__PLACEHOLDER_WILL_NOT_MATCH__",
|
|
21
|
+
"method": "POST"
|
|
22
|
+
},
|
|
23
|
+
"time_limit": 30,
|
|
24
|
+
"extra_info": []
|
|
25
|
+
}
|