clawbench-cli 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. clawbench/__init__.py +35 -0
  2. clawbench/__main__.py +8 -0
  3. clawbench/batch.py +619 -0
  4. clawbench/cli.py +397 -0
  5. clawbench/data/chrome-extension/README.md +127 -0
  6. clawbench/data/chrome-extension/background.js +50 -0
  7. clawbench/data/chrome-extension/content.js +70 -0
  8. clawbench/data/chrome-extension/manifest.json +25 -0
  9. clawbench/data/chrome-extension/setup.sh +27 -0
  10. clawbench/data/chrome-extension/stealth.js +200 -0
  11. clawbench/data/docker/Dockerfile +51 -0
  12. clawbench/data/docker/entrypoint.sh +394 -0
  13. clawbench/data/docker/setup-openclaw.sh +112 -0
  14. clawbench/data/eval/README.md +95 -0
  15. clawbench/data/eval/agentic_eval.md +53 -0
  16. clawbench/data/extension-server/.python-version +1 -0
  17. clawbench/data/extension-server/README.md +54 -0
  18. clawbench/data/extension-server/pyproject.toml +7 -0
  19. clawbench/data/extension-server/server.py +360 -0
  20. clawbench/data/extension-server/uv.lock +644 -0
  21. clawbench/data/models/model.schema.json +44 -0
  22. clawbench/data/models/models.example.yaml +16 -0
  23. clawbench/data/shared/alex_green_personal_info.json +451 -0
  24. clawbench/data/test-cases/001-daily-life-food-uber-eats/task.json +25 -0
  25. clawbench/data/test-cases/002-daily-life-food-doordash/task.json +25 -0
  26. clawbench/data/test-cases/004-daily-life-food-instacart/extra_info/grocery_list.json +36 -0
  27. clawbench/data/test-cases/004-daily-life-food-instacart/task.json +30 -0
  28. clawbench/data/test-cases/006-daily-life-food-uber-eats/task.json +24 -0
  29. clawbench/data/test-cases/007-daily-life-food-instacart/extra_info/meal_plan.json +21 -0
  30. clawbench/data/test-cases/007-daily-life-food-instacart/task.json +30 -0
  31. clawbench/data/test-cases/011-daily-life-housing-zillow/task.json +25 -0
  32. clawbench/data/test-cases/015-daily-life-housing-craigslist/extra_info/listing_details.json +26 -0
  33. clawbench/data/test-cases/015-daily-life-housing-craigslist/task.json +30 -0
  34. clawbench/data/test-cases/035-daily-life-health-medical-betterhelp/task.json +25 -0
  35. clawbench/data/test-cases/041-daily-life-pets-rover/task.json +25 -0
  36. clawbench/data/test-cases/043-daily-life-pets-rover/extra_info/pet_info.json +12 -0
  37. clawbench/data/test-cases/043-daily-life-pets-rover/task.json +30 -0
  38. clawbench/data/test-cases/045-daily-life-personal-care-booksy/task.json +25 -0
  39. clawbench/data/test-cases/047-daily-life-personal-care-taskrabbit/extra_info/address_info.json +7 -0
  40. clawbench/data/test-cases/047-daily-life-personal-care-taskrabbit/task.json +30 -0
  41. clawbench/data/test-cases/086-job-search-hr-cv-autofill-greenhouse-meta/extra_info/job_links.json +5 -0
  42. clawbench/data/test-cases/086-job-search-hr-cv-autofill-greenhouse-meta/task.json +30 -0
  43. clawbench/data/test-cases/089-job-search-hr-cv-autofill-simplify-jobs/extra_info/job_links.json +5 -0
  44. clawbench/data/test-cases/089-job-search-hr-cv-autofill-simplify-jobs/task.json +30 -0
  45. clawbench/data/test-cases/091-job-search-hr-job-apply-indeed/task.json +25 -0
  46. clawbench/data/test-cases/120-office-secretary-tasks-email-mgmt-purelymail/task.json +28 -0
  47. clawbench/data/test-cases/121-office-secretary-tasks-email-mgmt-purelymail/task.json +28 -0
  48. clawbench/data/test-cases/128-office-secretary-tasks-email-mgmt-purelymail/task.json +28 -0
  49. clawbench/data/test-cases/134-office-secretary-tasks-calendar-calendly/task.json +25 -0
  50. clawbench/data/test-cases/137-office-secretary-tasks-calendar-doodle/extra_info/meeting_details.json +30 -0
  51. clawbench/data/test-cases/137-office-secretary-tasks-calendar-doodle/task.json +30 -0
  52. clawbench/data/test-cases/139-office-secretary-tasks-calendar-calendly/task.json +25 -0
  53. clawbench/data/test-cases/142-office-secretary-tasks-collab-trello/extra_info/task_list.json +29 -0
  54. clawbench/data/test-cases/142-office-secretary-tasks-collab-trello/task.json +30 -0
  55. clawbench/data/test-cases/179-dev-tech-github-ops-github/extra_info/config.json +13 -0
  56. clawbench/data/test-cases/179-dev-tech-github-ops-github/task.json +30 -0
  57. clawbench/data/test-cases/180-dev-tech-github-ops-github/task.json +25 -0
  58. clawbench/data/test-cases/215-academia-research-paper-tables-overleaf/extra_info/raw_results.json +47 -0
  59. clawbench/data/test-cases/215-academia-research-paper-tables-overleaf/task.json +30 -0
  60. clawbench/data/test-cases/242-academia-research-research-tools-overleaf/task.json +25 -0
  61. clawbench/data/test-cases/246-academia-research-research-tools-zotero/task.json +25 -0
  62. clawbench/data/test-cases/247-academia-research-research-tools-semantic-scholar/task.json +25 -0
  63. clawbench/data/test-cases/265-education-learning-general-coursera/task.json +25 -0
  64. clawbench/data/test-cases/266-education-learning-general-leetcode/extra_info/solution_code.py +9 -0
  65. clawbench/data/test-cases/266-education-learning-general-leetcode/task.json +30 -0
  66. clawbench/data/test-cases/273-education-learning-general-edx/task.json +25 -0
  67. clawbench/data/test-cases/274-education-learning-general-udemy/task.json +25 -0
  68. clawbench/data/test-cases/279-travel-general-airbnb/task.json +25 -0
  69. clawbench/data/test-cases/280-travel-general-booking-com/task.json +25 -0
  70. clawbench/data/test-cases/363-entertainment-hobbies-general-ticketmaster/task.json +25 -0
  71. clawbench/data/test-cases/369-entertainment-hobbies-general-goodreads/extra_info/book_list.json +14 -0
  72. clawbench/data/test-cases/369-entertainment-hobbies-general-goodreads/task.json +30 -0
  73. clawbench/data/test-cases/372-entertainment-hobbies-general-eventbrite/extra_info/event_details.json +10 -0
  74. clawbench/data/test-cases/372-entertainment-hobbies-general-eventbrite/task.json +30 -0
  75. clawbench/data/test-cases/403-personal-management-account-security-1password-web/extra_info/credentials.json +34 -0
  76. clawbench/data/test-cases/403-personal-management-account-security-1password-web/task.json +30 -0
  77. clawbench/data/test-cases/413-personal-management-personal-tools-todoist/extra_info/task_list.json +52 -0
  78. clawbench/data/test-cases/413-personal-management-personal-tools-todoist/task.json +30 -0
  79. clawbench/data/test-cases/468-rating-voting-general-glassdoor/extra_info/interview_experience.json +10 -0
  80. clawbench/data/test-cases/468-rating-voting-general-glassdoor/task.json +30 -0
  81. clawbench/data/test-cases/469-rating-voting-general-tripadvisor/extra_info/review_content.json +6 -0
  82. clawbench/data/test-cases/469-rating-voting-general-tripadvisor/task.json +30 -0
  83. clawbench/data/test-cases/470-rating-voting-general-trustpilot/extra_info/review_content.json +6 -0
  84. clawbench/data/test-cases/470-rating-voting-general-trustpilot/task.json +30 -0
  85. clawbench/data/test-cases/474-rating-voting-general-capterra/task.json +25 -0
  86. clawbench/data/test-cases/475-rating-voting-general-g2/task.json +25 -0
  87. clawbench/data/test-cases/482-creation-init-general-confluence/extra_info/content.json +3 -0
  88. clawbench/data/test-cases/482-creation-init-general-confluence/task.json +30 -0
  89. clawbench/data/test-cases/483-creation-init-general-airtable/task.json +25 -0
  90. clawbench/data/test-cases/484-creation-init-general-clickup/task.json +28 -0
  91. clawbench/data/test-cases/485-creation-init-general-webflow/task.json +25 -0
  92. clawbench/data/test-cases/486-creation-init-general-mailchimp/extra_info/content.json +3 -0
  93. clawbench/data/test-cases/486-creation-init-general-mailchimp/task.json +30 -0
  94. clawbench/data/test-cases/487-creation-init-general-typeform/extra_info/survey_questions.json +85 -0
  95. clawbench/data/test-cases/487-creation-init-general-typeform/task.json +30 -0
  96. clawbench/data/test-cases/488-creation-init-general-substack/extra_info/content.json +3 -0
  97. clawbench/data/test-cases/488-creation-init-general-substack/task.json +30 -0
  98. clawbench/data/test-cases/489-creation-init-general-ghost/extra_info/content.json +3 -0
  99. clawbench/data/test-cases/489-creation-init-general-ghost/task.json +30 -0
  100. clawbench/data/test-cases/501-creation-init-general-asana/extra_info/project_description.json +8 -0
  101. clawbench/data/test-cases/501-creation-init-general-asana/task.json +33 -0
  102. clawbench/data/test-cases/529-daily-life-shopping-delivery-king-arthur-baking/task.json +25 -0
  103. clawbench/data/test-cases/533-daily-life-utilities-inmyarea/task.json +25 -0
  104. clawbench/data/test-cases/535-daily-life-home-home-depot/task.json +25 -0
  105. clawbench/data/test-cases/537-daily-life-food-crumbl/task.json +25 -0
  106. clawbench/data/test-cases/539-daily-life-health-jefit/task.json +25 -0
  107. clawbench/data/test-cases/542-daily-life-pets-wag/task.json +25 -0
  108. clawbench/data/test-cases/551-finance-investment-crypto-wallet-trezor/task.json +25 -0
  109. clawbench/data/test-cases/552-finance-investment-business-payment-plooto/task.json +25 -0
  110. clawbench/data/test-cases/555-finance-investment-insurance-insureon/task.json +25 -0
  111. clawbench/data/test-cases/559-finance-investment-crowdfunding-frontfundr/task.json +25 -0
  112. clawbench/data/test-cases/564-daily-life-event-registration-race-roster/task.json +25 -0
  113. clawbench/data/test-cases/565-job-search-hr-job-search-jopwell/task.json +25 -0
  114. clawbench/data/test-cases/566-job-search-hr-job-search-ziprecruiter/extra_info/listing_details.json +26 -0
  115. clawbench/data/test-cases/566-job-search-hr-job-search-ziprecruiter/task.json +30 -0
  116. clawbench/data/test-cases/569-job-search-hr-job-search-careerbuilder/task.json +25 -0
  117. clawbench/data/test-cases/570-job-search-hr-job-search-hired/task.json +25 -0
  118. clawbench/data/test-cases/571-job-search-hr-recruitment-mgmt-workable/extra_info/listing_details.json +26 -0
  119. clawbench/data/test-cases/571-job-search-hr-recruitment-mgmt-workable/task.json +30 -0
  120. clawbench/data/test-cases/576-office-secretary-tasks-reports-ftc-reportfraud/task.json +25 -0
  121. clawbench/data/test-cases/583-office-secretary-tasks-support-tickets-freshdesk/task.json +25 -0
  122. clawbench/data/test-cases/598-academia-research-legal-docs-formswift/task.json +25 -0
  123. clawbench/data/test-cases/606-education-learning-kids-courses-outschool/task.json +25 -0
  124. clawbench/data/test-cases/607-education-learning-art-courses-creativebug/task.json +25 -0
  125. clawbench/data/test-cases/609-education-learning-meditation-spirit-rock-meditation-center/task.json +25 -0
  126. clawbench/data/test-cases/615-travel-flights-spirit-airlines/task.json +25 -0
  127. clawbench/data/test-cases/618-travel-train-bus-12go-asia/task.json +25 -0
  128. clawbench/data/test-cases/625-travel-camping-outdoor-parks-canada-reservations/task.json +25 -0
  129. clawbench/data/test-cases/626-travel-bus-flixbus/task.json +25 -0
  130. clawbench/data/test-cases/627-travel-flights-momondo/task.json +25 -0
  131. clawbench/data/test-cases/632-shopping-commerce-beauty-care-olaplex/task.json +25 -0
  132. clawbench/data/test-cases/634-shopping-commerce-apparel-dooney-bourke/task.json +25 -0
  133. clawbench/data/test-cases/635-shopping-commerce-gifts-uncommon-goods/task.json +25 -0
  134. clawbench/data/test-cases/636-shopping-commerce-auto-parts-rockauto/task.json +25 -0
  135. clawbench/data/test-cases/638-shopping-commerce-print-custom-vistaprint/task.json +25 -0
  136. clawbench/data/test-cases/639-shopping-commerce-luxury-mansur-gavriel/task.json +25 -0
  137. clawbench/data/test-cases/671-entertainment-gaming-humble-bundle/task.json +25 -0
  138. clawbench/data/test-cases/672-entertainment-hobbies-anime-streaming-crunchyroll/task.json +25 -0
  139. clawbench/data/test-cases/674-entertainment-hobbies-masterclass-masterclass/task.json +25 -0
  140. clawbench/data/test-cases/676-government-civic-legal-docs-legalnature/task.json +25 -0
  141. clawbench/data/test-cases/685-personal-management-budget-mgmt-everydollar/task.json +25 -0
  142. clawbench/data/test-cases/687-personal-management-vpn-subscription-ipvanish/task.json +25 -0
  143. clawbench/data/test-cases/688-personal-management-insurance-compare-insurify/task.json +25 -0
  144. clawbench/data/test-cases/695-automation-workflows-recurring-order-stumptown-coffee/task.json +25 -0
  145. clawbench/data/test-cases/697-automation-workflows-recurring-order-bean-box/task.json +25 -0
  146. clawbench/data/test-cases/699-automation-workflows-recurring-order-mistobox/task.json +25 -0
  147. clawbench/data/test-cases/700-deletion-revocation-data-deletion-deleteme/task.json +25 -0
  148. clawbench/data/test-cases/705-rating-voting-wine-review-vivino/task.json +25 -0
  149. clawbench/data/test-cases/706-rating-voting-beer-review-beeradvocate/task.json +25 -0
  150. clawbench/data/test-cases/707-rating-voting-social-wine-untappd/task.json +25 -0
  151. clawbench/data/test-cases/708-rating-voting-professor-review-ratemyprofessors/task.json +28 -0
  152. clawbench/data/test-cases/709-rating-voting-service-review-angi/task.json +25 -0
  153. clawbench/data/test-cases/710-creation-init-interior-design-roomsketcher/task.json +25 -0
  154. clawbench/data/test-cases/711-creation-init-color-design-coolors/task.json +25 -0
  155. clawbench/data/test-cases/712-creation-init-website-create-squarespace/task.json +25 -0
  156. clawbench/data/test-cases/713-creation-init-website-build-wix/task.json +25 -0
  157. clawbench/data/test-cases/735-home-services-maintenance-house-cleaning-bark/task.json +25 -0
  158. clawbench/data/test-cases/736-home-services-maintenance-plumbing-ace-hardware/task.json +25 -0
  159. clawbench/data/test-cases/737-home-services-maintenance-kitchen-remodel-lowes/task.json +25 -0
  160. clawbench/data/test-cases/738-home-services-maintenance-equipment-install-amazon-home-services/task.json +25 -0
  161. clawbench/data/test-cases/750-automotive-vehicle-services-car-insurance-compare-kanetix/task.json +25 -0
  162. clawbench/data/test-cases/751-automotive-vehicle-services-car-lease-sixt/task.json +25 -0
  163. clawbench/data/test-cases/754-automotive-vehicle-services-used-car-listing-autotrader/task.json +25 -0
  164. clawbench/data/test-cases/763-automotive-vehicle-services-car-lease-autoslash/task.json +25 -0
  165. clawbench/data/test-cases/766-nonprofit-charity-donation-doctors-without-borders-msf/task.json +25 -0
  166. clawbench/data/test-cases/768-nonprofit-charity-community-crowdfund-ioby/task.json +25 -0
  167. clawbench/data/test-cases/770-nonprofit-charity-volunteer-apply-on-make-a-wish-foundation-website-complete-and-submit-a-volunteer-application-form-selecting-the-wish-granter-role-and-entering-city-phoenix-az/task.json +25 -0
  168. clawbench/data/test-cases/774-nonprofit-charity-nonprofit-job-apply-charity-village/task.json +25 -0
  169. clawbench/data/test-cases/776-nonprofit-charity-volunteer-signup-idealist/task.json +25 -0
  170. clawbench/data/test-cases/778-nonprofit-charity-donation-globalgiving/extra_info/payment_info.json +3 -0
  171. clawbench/data/test-cases/778-nonprofit-charity-donation-globalgiving/task.json +30 -0
  172. clawbench/data/test-cases/780-beauty-personal-care-skincare-purchase-soko-glam/extra_info/address_info.json +4 -0
  173. clawbench/data/test-cases/780-beauty-personal-care-skincare-purchase-soko-glam/task.json +30 -0
  174. clawbench/data/test-cases/781-beauty-personal-care-beauty-booking-bluemercury/extra_info/email_info.json +3 -0
  175. clawbench/data/test-cases/781-beauty-personal-care-beauty-booking-bluemercury/task.json +30 -0
  176. clawbench/data/test-cases/782-beauty-personal-care-skincare-purchase-paulas-choice/task.json +24 -0
  177. clawbench/data/test-cases/783-beauty-personal-care-beauty-booking-ulta-beauty/task.json +24 -0
  178. clawbench/data/test-cases/785-beauty-personal-care-skincare-curology/task.json +25 -0
  179. clawbench/data/test-cases/788-beauty-personal-care-makeup-the-ordinary/task.json +25 -0
  180. clawbench/data/test-cases/789-beauty-personal-care-makeup-fenty-beauty/task.json +25 -0
  181. clawbench/data/test-cases/793-beauty-personal-care-beauty-retail-mac-cosmetics/task.json +25 -0
  182. clawbench/data/test-cases/794-beauty-personal-care-salon-booking-styleseat/task.json +25 -0
  183. clawbench/data/test-cases/795-pet-animal-care-pet-adoption-aspca/task.json +25 -0
  184. clawbench/data/test-cases/796-pet-animal-care-pet-supplies-grooming-petsmart/extra_info/pet_info.json +12 -0
  185. clawbench/data/test-cases/796-pet-animal-care-pet-supplies-grooming-petsmart/task.json +30 -0
  186. clawbench/data/test-cases/799-pet-animal-care-pet-insurance-aspca-pet-health-insurance/task.json +25 -0
  187. clawbench/data/test-cases/801-pet-animal-care-pet-friendly-travel-bringfido/task.json +25 -0
  188. clawbench/data/test-cases/803-pet-animal-care-pet-medical-pawp/extra_info/pet_info.json +12 -0
  189. clawbench/data/test-cases/803-pet-animal-care-pet-medical-pawp/task.json +30 -0
  190. clawbench/data/test-cases/807-pet-animal-care-pet-dna-embark/task.json +25 -0
  191. clawbench/data/test-cases/809-pet-animal-care-pet-adopt-petfinder/task.json +28 -0
  192. clawbench/data/test-cases/812-pet-animal-care-pet-subscription-ollie/task.json +25 -0
  193. clawbench/data/test-cases/815-personal-management-records-mgmt-myheritage/task.json +25 -0
  194. clawbench/data/test-cases/821-education-learning-reading-self-study-blinkist/task.json +25 -0
  195. clawbench/data/test-cases/861-entertainment-hobbies-movies-cineplex/task.json +25 -0
  196. clawbench/data/test-cases/862-entertainment-hobbies-movies-amc-theatres/task.json +25 -0
  197. clawbench/data/test-cases/864-entertainment-hobbies-show-tickets-ticketmaster/task.json +25 -0
  198. clawbench/data/test-cases/865-travel-outdoor-hipcamp/task.json +25 -0
  199. clawbench/data/test-cases/867-entertainment-hobbies-movies-fandango/task.json +25 -0
  200. clawbench/data/test-cases/872-daily-life-food-opentable/task.json +25 -0
  201. clawbench/data/test-cases/873-daily-life-food-resy/task.json +28 -0
  202. clawbench/data/test-cases/876-entertainment-hobbies-show-tickets-vivid-seats/task.json +25 -0
  203. clawbench/data/test-cases/877-entertainment-hobbies-show-tickets-stubhub/task.json +25 -0
  204. clawbench/data/test-cases/878-travel-outdoor-ontario-parks/task.json +25 -0
  205. clawbench/data/test-cases/883-education-learning-hobby-class-sur-la-table/task.json +25 -0
  206. clawbench/data/test-cases/884-entertainment-hobbies-experience-breakout-games/task.json +25 -0
  207. clawbench/data/test-cases/885-entertainment-hobbies-experience-bowlero/task.json +25 -0
  208. clawbench/data/test-cases/886-entertainment-hobbies-experience-topgolf/task.json +25 -0
  209. clawbench/data/test-cases/lite.json +226 -0
  210. clawbench/data/test-cases/lite.schema.json +105 -0
  211. clawbench/data/test-cases/task.schema.json +132 -0
  212. clawbench/data/tools/build_clawbench_lite_enc.py +161 -0
  213. clawbench/doctor.py +171 -0
  214. clawbench/engine.py +180 -0
  215. clawbench/generate_resume_pdf.py +140 -0
  216. clawbench/hf_upload.py +78 -0
  217. clawbench/image.py +127 -0
  218. clawbench/paths.py +150 -0
  219. clawbench/resume_template.json +104 -0
  220. clawbench/run.py +942 -0
  221. clawbench/tui.py +1401 -0
  222. clawbench_cli-0.1.2.dist-info/METADATA +770 -0
  223. clawbench_cli-0.1.2.dist-info/RECORD +226 -0
  224. clawbench_cli-0.1.2.dist-info/WHEEL +4 -0
  225. clawbench_cli-0.1.2.dist-info/entry_points.txt +4 -0
  226. clawbench_cli-0.1.2.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,25 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 625,
5
+ "metaclass": "travel",
6
+ "class": "camping-outdoor",
7
+ "description": "Register an account on the Parks Canada reservation portal, search for a 3-night campsite at Banff National Park (2 persons, electrical), select an available site and submit the reservation",
8
+ "sites_involved": [
9
+ "reservation.pc.gc.ca"
10
+ ],
11
+ "platform": "parks-canada-reservations",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "Register an account on the Parks Canada reservation portal, search for a 3-night campsite at Banff National Park (2 persons, electrical), select an available site and submit the reservation",
19
+ "eval_schema": {
20
+ "url_pattern": "__PLACEHOLDER_WILL_NOT_MATCH__",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": []
25
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 626,
5
+ "metaclass": "travel",
6
+ "class": "bus",
7
+ "description": "Book a one-way FlixBus ticket from New York to Washington D.C. departing 2026-08-15, select the cheapest departure, fill in passenger name and email and complete checkout",
8
+ "sites_involved": [
9
+ "flixbus.com"
10
+ ],
11
+ "platform": "flixbus",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "Book a one-way FlixBus ticket from New York to Washington D.C. departing 2026-08-15, select the cheapest departure, fill in passenger name and email and complete checkout",
19
+ "eval_schema": {
20
+ "url_pattern": "__PLACEHOLDER_WILL_NOT_MATCH__",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": []
25
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 627,
5
+ "metaclass": "travel",
6
+ "class": "flights",
7
+ "description": "Search for a one-way economy flight from Toronto to Barcelona departing 2026-09-01 on Momondo, select the cheapest option and proceed to the passenger information page",
8
+ "sites_involved": [
9
+ "momondo.com"
10
+ ],
11
+ "platform": "momondo",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "Search for a one-way economy flight from Toronto to Barcelona departing 2026-09-01 on Momondo, select the cheapest option and proceed to the passenger information page",
19
+ "eval_schema": {
20
+ "url_pattern": "__PLACEHOLDER_WILL_NOT_MATCH__",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": []
25
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 632,
5
+ "metaclass": "shopping-commerce",
6
+ "class": "beauty-care",
7
+ "description": "Complete the Hair Quiz on Olaplex, add any quiz-recommended product to the cart, and proceed to checkout",
8
+ "sites_involved": [
9
+ "olaplex.com"
10
+ ],
11
+ "platform": "olaplex",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "Complete the Hair Quiz on Olaplex, add any quiz-recommended product to the cart, and proceed to checkout",
19
+ "eval_schema": {
20
+ "url_pattern": "__PLACEHOLDER_WILL_NOT_MATCH__",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": []
25
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 634,
5
+ "metaclass": "shopping-commerce",
6
+ "class": "apparel",
7
+ "description": "Add a Pebble Grain crossbody bag (any color) to the cart on Dooney & Bourke, enter coupon code SAVE10 in the cart and click apply, then proceed to checkout",
8
+ "sites_involved": [
9
+ "dooney.com"
10
+ ],
11
+ "platform": "dooney-bourke",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "Add a Pebble Grain crossbody bag (any color) to the cart on Dooney & Bourke, enter coupon code SAVE10 in the cart and click apply, then proceed to checkout",
19
+ "eval_schema": {
20
+ "url_pattern": "__PLACEHOLDER_WILL_NOT_MATCH__",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": []
25
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 635,
5
+ "metaclass": "shopping-commerce",
6
+ "class": "gifts",
7
+ "description": "On Uncommon Goods, add one item under $50 from the \"Gifts for Him\" section to cart and enter a shipping address",
8
+ "sites_involved": [
9
+ "uncommongoods.com"
10
+ ],
11
+ "platform": "uncommon-goods",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "On Uncommon Goods, add one item under $50 from the \"Gifts for Him\" section to cart and enter a shipping address",
19
+ "eval_schema": {
20
+ "url_pattern": "__PLACEHOLDER_WILL_NOT_MATCH__",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": []
25
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 636,
5
+ "metaclass": "shopping-commerce",
6
+ "class": "auto-parts",
7
+ "description": "On RockAuto, find a front brake rotor for a 2019 Ford F-150, select an Economy grade option, and add it to cart",
8
+ "sites_involved": [
9
+ "rockauto.com"
10
+ ],
11
+ "platform": "rockauto",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "On RockAuto, find a front brake rotor for a 2019 Ford F-150, select an Economy grade option, and add it to cart",
19
+ "eval_schema": {
20
+ "url_pattern": "__PLACEHOLDER_WILL_NOT_MATCH__",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": []
25
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 638,
5
+ "metaclass": "shopping-commerce",
6
+ "class": "print-custom",
7
+ "description": "On Vistaprint, design 250 standard business cards using a template, enter custom name, job title, and phone number, then add to cart",
8
+ "sites_involved": [
9
+ "vistaprint.com"
10
+ ],
11
+ "platform": "vistaprint",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "On Vistaprint, design 250 standard business cards using a template, enter custom name, job title, and phone number, then add to cart",
19
+ "eval_schema": {
20
+ "url_pattern": "__PLACEHOLDER_WILL_NOT_MATCH__",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": []
25
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 639,
5
+ "metaclass": "shopping-commerce",
6
+ "class": "luxury",
7
+ "description": "On Mansur Gavriel, add a Bucket Bag to cart with a selected color and size, then proceed to the checkout page",
8
+ "sites_involved": [
9
+ "mansurgavriel.com"
10
+ ],
11
+ "platform": "mansur-gavriel",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "On Mansur Gavriel, add a Bucket Bag to cart with a selected color and size, then proceed to the checkout page",
19
+ "eval_schema": {
20
+ "url_pattern": "__PLACEHOLDER_WILL_NOT_MATCH__",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": []
25
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 671,
5
+ "metaclass": "entertainment-hobbies",
6
+ "class": "gaming",
7
+ "description": "Register an account on Humble Bundle, add a currently active game bundle to cart, set custom price to $15, and proceed to checkout",
8
+ "sites_involved": [
9
+ "humblebundle.com"
10
+ ],
11
+ "platform": "humble-bundle",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "Register an account on Humble Bundle, add a currently active game bundle to cart, set custom price to $15, and proceed to checkout",
19
+ "eval_schema": {
20
+ "url_pattern": "__PLACEHOLDER_WILL_NOT_MATCH__",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": []
25
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 672,
5
+ "metaclass": "entertainment-hobbies",
6
+ "class": "anime-streaming",
7
+ "description": "Create an account on Crunchyroll and select the Mega Fan monthly subscription plan to complete the registration and payment flow",
8
+ "sites_involved": [
9
+ "crunchyroll.com"
10
+ ],
11
+ "platform": "crunchyroll",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "Create an account on Crunchyroll and select the Mega Fan monthly subscription plan to complete the registration and payment flow",
19
+ "eval_schema": {
20
+ "url_pattern": "__PLACEHOLDER_WILL_NOT_MATCH__",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": []
25
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 674,
5
+ "metaclass": "entertainment-hobbies",
6
+ "class": "masterclass",
7
+ "description": "On MasterClass, select the Annual All-Access Pass plan, create an account, and reach the payment page with account and payment details filled in.",
8
+ "sites_involved": [
9
+ "masterclass.com"
10
+ ],
11
+ "platform": "masterclass",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "On MasterClass, select the Annual All-Access Pass plan, create an account, and reach the payment page with account and payment details filled in.",
19
+ "eval_schema": {
20
+ "url_pattern": "__PLACEHOLDER_WILL_NOT_MATCH__",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": []
25
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 676,
5
+ "metaclass": "government-civic",
6
+ "class": "legal-docs",
7
+ "description": "On LegalNature, select Rental/Lease Agreement, choose California, fill in property address, landlord name, tenant name, monthly rent, and lease term, then generate the agreement",
8
+ "sites_involved": [
9
+ "legalnature.com"
10
+ ],
11
+ "platform": "legalnature",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "On LegalNature, select Rental/Lease Agreement, choose California, fill in property address, landlord name, tenant name, monthly rent, and lease term, then generate the agreement",
19
+ "eval_schema": {
20
+ "url_pattern": "__PLACEHOLDER_WILL_NOT_MATCH__",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": []
25
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 685,
5
+ "metaclass": "personal-management",
6
+ "class": "budget-mgmt",
7
+ "description": "Create a monthly budget on EveryDollar: income $5000, add three expense categories \u2014 Housing $1500, Food $400, Transportation $300",
8
+ "sites_involved": [
9
+ "everydollar.com"
10
+ ],
11
+ "platform": "everydollar",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "Create a monthly budget on EveryDollar: income $5000, add three expense categories \u2014 Housing $1500, Food $400, Transportation $300",
19
+ "eval_schema": {
20
+ "url_pattern": "__PLACEHOLDER_WILL_NOT_MATCH__",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": []
25
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 687,
5
+ "metaclass": "personal-management",
6
+ "class": "vpn-subscription",
7
+ "description": "On IPVanish, compare monthly and annual plans, select the annual plan, and fill in the account registration form",
8
+ "sites_involved": [
9
+ "ipvanish.com"
10
+ ],
11
+ "platform": "ipvanish",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "On IPVanish, compare monthly and annual plans, select the annual plan, and fill in the account registration form",
19
+ "eval_schema": {
20
+ "url_pattern": "__PLACEHOLDER_WILL_NOT_MATCH__",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": []
25
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 688,
5
+ "metaclass": "personal-management",
6
+ "class": "insurance-compare",
7
+ "description": "On Insurify, enter basic personal information and auto insurance preferences to receive quotes from at least 3 insurance companies",
8
+ "sites_involved": [
9
+ "insurify.com"
10
+ ],
11
+ "platform": "insurify",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "On Insurify, enter basic personal information and auto insurance preferences to receive quotes from at least 3 insurance companies",
19
+ "eval_schema": {
20
+ "url_pattern": "insurify\\.com/(?!cdn-cgi/)(?!.*static_user_pageview)(?!.*analytics)(?!.*tracking).*(quote|funnel|lead|insurance|compare|application)",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": []
25
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 695,
5
+ "metaclass": "automation-workflows",
6
+ "class": "recurring-order",
7
+ "description": "On Stumptown Coffee's website, select a medium roast coffee bean, set up a monthly subscription delivery for 2 bags, and submit the subscription",
8
+ "sites_involved": [
9
+ "stumptowncoffee.com"
10
+ ],
11
+ "platform": "stumptown-coffee",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "On Stumptown Coffee's website, select a medium roast coffee bean, set up a monthly subscription delivery for 2 bags, and submit the subscription",
19
+ "eval_schema": {
20
+ "url_pattern": "__PLACEHOLDER_WILL_NOT_MATCH__",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": []
25
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 697,
5
+ "metaclass": "automation-workflows",
6
+ "class": "recurring-order",
7
+ "description": "On Bean Box, select a monthly subscription plan (monthly curated coffee box), choose 6-cup size with ground coffee, and reach the checkout page with shipping and payment details filled in.",
8
+ "sites_involved": [
9
+ "beanbox.com"
10
+ ],
11
+ "platform": "bean-box",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "On Bean Box, select a monthly subscription plan (monthly curated coffee box), choose 6-cup size with ground coffee, and reach the checkout page with shipping and payment details filled in.",
19
+ "eval_schema": {
20
+ "url_pattern": "__PLACEHOLDER_WILL_NOT_MATCH__",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": []
25
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 699,
5
+ "metaclass": "automation-workflows",
6
+ "class": "recurring-order",
7
+ "description": "On Mistobox, complete a coffee subscription signup: select preferences (roast=medium-dark, grind=whole bean), choose a biweekly delivery plan, fill in shipping info, and submit",
8
+ "sites_involved": [
9
+ "mistobox.com"
10
+ ],
11
+ "platform": "mistobox",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "On Mistobox, complete a coffee subscription signup: select preferences (roast=medium-dark, grind=whole bean), choose a biweekly delivery plan, fill in shipping info, and submit",
19
+ "eval_schema": {
20
+ "url_pattern": "__PLACEHOLDER_WILL_NOT_MATCH__",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": []
25
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 700,
5
+ "metaclass": "deletion-revocation",
6
+ "class": "data-deletion",
7
+ "description": "On DeleteMe's website, select the Individual (1 person) subscription plan, submit personal information for data removal, and complete checkout",
8
+ "sites_involved": [
9
+ "joindeleteme.com"
10
+ ],
11
+ "platform": "deleteme",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "On DeleteMe's website, select the Individual (1 person) subscription plan, submit personal information for data removal, and complete checkout",
19
+ "eval_schema": {
20
+ "url_pattern": "__PLACEHOLDER_WILL_NOT_MATCH__",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": []
25
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 705,
5
+ "metaclass": "rating-voting",
6
+ "class": "wine-review",
7
+ "description": "Submit a 4-star rating for a Pinot Noir on Vivino with exactly this tasting note: \"Light body, cherry notes, smooth finish.\"",
8
+ "sites_involved": [
9
+ "vivino.com"
10
+ ],
11
+ "platform": "vivino",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "Submit a 4-star rating for a Pinot Noir on Vivino with exactly this tasting note: \"Light body, cherry notes, smooth finish.\"",
19
+ "eval_schema": {
20
+ "url_pattern": "vivino\\.com/api/vintages/\\d+/reviews",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": []
25
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 706,
5
+ "metaclass": "rating-voting",
6
+ "class": "beer-review",
7
+ "description": "Submit a review for an Imperial Stout on BeerAdvocate with all dimension scores filled and exactly this comment: \"Rich malt, roasty aroma, full body.\"",
8
+ "sites_involved": [
9
+ "beeradvocate.com"
10
+ ],
11
+ "platform": "beeradvocate",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "Submit a review for an Imperial Stout on BeerAdvocate with all dimension scores filled and exactly this comment: \"Rich malt, roasty aroma, full body.\"",
19
+ "eval_schema": {
20
+ "url_pattern": "beeradvocate\\.com/beer/rate/\\d+",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": []
25
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 707,
5
+ "metaclass": "rating-voting",
6
+ "class": "social-wine",
7
+ "description": "Check in an IPA on Untappd with the highest star rating, and check out",
8
+ "sites_involved": [
9
+ "untappd.com"
10
+ ],
11
+ "platform": "untappd",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "Check in an IPA on Untappd with the highest star rating, and check out",
19
+ "eval_schema": {
20
+ "url_pattern": "^https://untappd\\.com/api/v2/shop/summaries/\\d+",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": []
25
+ }
@@ -0,0 +1,28 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 708,
5
+ "metaclass": "rating-voting",
6
+ "class": "professor-review",
7
+ "description": "Submit a rating for a Computer Science professor on RateMyProfessors: quality 4, difficulty 3, would take again Yes, with exactly this comment: \"Clear lectures, fair exams.\"",
8
+ "sites_involved": [
9
+ "ratemyprofessors.com"
10
+ ],
11
+ "platform": "ratemyprofessors",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "Submit a rating for a Computer Science professor on RateMyProfessors: quality 4, difficulty 3, would take again Yes, with exactly this comment: \"Clear lectures, fair exams.\"",
19
+ "eval_schema": {
20
+ "url_pattern": "ratemyprofessors\\.com/graphql",
21
+ "method": "POST",
22
+ "body": {
23
+ "operationName": "RateTeacherMutation"
24
+ }
25
+ },
26
+ "time_limit": 30,
27
+ "extra_info": []
28
+ }