clawbench-cli 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. clawbench/__init__.py +35 -0
  2. clawbench/__main__.py +8 -0
  3. clawbench/batch.py +619 -0
  4. clawbench/cli.py +397 -0
  5. clawbench/data/chrome-extension/README.md +127 -0
  6. clawbench/data/chrome-extension/background.js +50 -0
  7. clawbench/data/chrome-extension/content.js +70 -0
  8. clawbench/data/chrome-extension/manifest.json +25 -0
  9. clawbench/data/chrome-extension/setup.sh +27 -0
  10. clawbench/data/chrome-extension/stealth.js +200 -0
  11. clawbench/data/docker/Dockerfile +51 -0
  12. clawbench/data/docker/entrypoint.sh +394 -0
  13. clawbench/data/docker/setup-openclaw.sh +112 -0
  14. clawbench/data/eval/README.md +95 -0
  15. clawbench/data/eval/agentic_eval.md +53 -0
  16. clawbench/data/extension-server/.python-version +1 -0
  17. clawbench/data/extension-server/README.md +54 -0
  18. clawbench/data/extension-server/pyproject.toml +7 -0
  19. clawbench/data/extension-server/server.py +360 -0
  20. clawbench/data/extension-server/uv.lock +644 -0
  21. clawbench/data/models/model.schema.json +44 -0
  22. clawbench/data/models/models.example.yaml +16 -0
  23. clawbench/data/shared/alex_green_personal_info.json +451 -0
  24. clawbench/data/test-cases/001-daily-life-food-uber-eats/task.json +25 -0
  25. clawbench/data/test-cases/002-daily-life-food-doordash/task.json +25 -0
  26. clawbench/data/test-cases/004-daily-life-food-instacart/extra_info/grocery_list.json +36 -0
  27. clawbench/data/test-cases/004-daily-life-food-instacart/task.json +30 -0
  28. clawbench/data/test-cases/006-daily-life-food-uber-eats/task.json +24 -0
  29. clawbench/data/test-cases/007-daily-life-food-instacart/extra_info/meal_plan.json +21 -0
  30. clawbench/data/test-cases/007-daily-life-food-instacart/task.json +30 -0
  31. clawbench/data/test-cases/011-daily-life-housing-zillow/task.json +25 -0
  32. clawbench/data/test-cases/015-daily-life-housing-craigslist/extra_info/listing_details.json +26 -0
  33. clawbench/data/test-cases/015-daily-life-housing-craigslist/task.json +30 -0
  34. clawbench/data/test-cases/035-daily-life-health-medical-betterhelp/task.json +25 -0
  35. clawbench/data/test-cases/041-daily-life-pets-rover/task.json +25 -0
  36. clawbench/data/test-cases/043-daily-life-pets-rover/extra_info/pet_info.json +12 -0
  37. clawbench/data/test-cases/043-daily-life-pets-rover/task.json +30 -0
  38. clawbench/data/test-cases/045-daily-life-personal-care-booksy/task.json +25 -0
  39. clawbench/data/test-cases/047-daily-life-personal-care-taskrabbit/extra_info/address_info.json +7 -0
  40. clawbench/data/test-cases/047-daily-life-personal-care-taskrabbit/task.json +30 -0
  41. clawbench/data/test-cases/086-job-search-hr-cv-autofill-greenhouse-meta/extra_info/job_links.json +5 -0
  42. clawbench/data/test-cases/086-job-search-hr-cv-autofill-greenhouse-meta/task.json +30 -0
  43. clawbench/data/test-cases/089-job-search-hr-cv-autofill-simplify-jobs/extra_info/job_links.json +5 -0
  44. clawbench/data/test-cases/089-job-search-hr-cv-autofill-simplify-jobs/task.json +30 -0
  45. clawbench/data/test-cases/091-job-search-hr-job-apply-indeed/task.json +25 -0
  46. clawbench/data/test-cases/120-office-secretary-tasks-email-mgmt-purelymail/task.json +28 -0
  47. clawbench/data/test-cases/121-office-secretary-tasks-email-mgmt-purelymail/task.json +28 -0
  48. clawbench/data/test-cases/128-office-secretary-tasks-email-mgmt-purelymail/task.json +28 -0
  49. clawbench/data/test-cases/134-office-secretary-tasks-calendar-calendly/task.json +25 -0
  50. clawbench/data/test-cases/137-office-secretary-tasks-calendar-doodle/extra_info/meeting_details.json +30 -0
  51. clawbench/data/test-cases/137-office-secretary-tasks-calendar-doodle/task.json +30 -0
  52. clawbench/data/test-cases/139-office-secretary-tasks-calendar-calendly/task.json +25 -0
  53. clawbench/data/test-cases/142-office-secretary-tasks-collab-trello/extra_info/task_list.json +29 -0
  54. clawbench/data/test-cases/142-office-secretary-tasks-collab-trello/task.json +30 -0
  55. clawbench/data/test-cases/179-dev-tech-github-ops-github/extra_info/config.json +13 -0
  56. clawbench/data/test-cases/179-dev-tech-github-ops-github/task.json +30 -0
  57. clawbench/data/test-cases/180-dev-tech-github-ops-github/task.json +25 -0
  58. clawbench/data/test-cases/215-academia-research-paper-tables-overleaf/extra_info/raw_results.json +47 -0
  59. clawbench/data/test-cases/215-academia-research-paper-tables-overleaf/task.json +30 -0
  60. clawbench/data/test-cases/242-academia-research-research-tools-overleaf/task.json +25 -0
  61. clawbench/data/test-cases/246-academia-research-research-tools-zotero/task.json +25 -0
  62. clawbench/data/test-cases/247-academia-research-research-tools-semantic-scholar/task.json +25 -0
  63. clawbench/data/test-cases/265-education-learning-general-coursera/task.json +25 -0
  64. clawbench/data/test-cases/266-education-learning-general-leetcode/extra_info/solution_code.py +9 -0
  65. clawbench/data/test-cases/266-education-learning-general-leetcode/task.json +30 -0
  66. clawbench/data/test-cases/273-education-learning-general-edx/task.json +25 -0
  67. clawbench/data/test-cases/274-education-learning-general-udemy/task.json +25 -0
  68. clawbench/data/test-cases/279-travel-general-airbnb/task.json +25 -0
  69. clawbench/data/test-cases/280-travel-general-booking-com/task.json +25 -0
  70. clawbench/data/test-cases/363-entertainment-hobbies-general-ticketmaster/task.json +25 -0
  71. clawbench/data/test-cases/369-entertainment-hobbies-general-goodreads/extra_info/book_list.json +14 -0
  72. clawbench/data/test-cases/369-entertainment-hobbies-general-goodreads/task.json +30 -0
  73. clawbench/data/test-cases/372-entertainment-hobbies-general-eventbrite/extra_info/event_details.json +10 -0
  74. clawbench/data/test-cases/372-entertainment-hobbies-general-eventbrite/task.json +30 -0
  75. clawbench/data/test-cases/403-personal-management-account-security-1password-web/extra_info/credentials.json +34 -0
  76. clawbench/data/test-cases/403-personal-management-account-security-1password-web/task.json +30 -0
  77. clawbench/data/test-cases/413-personal-management-personal-tools-todoist/extra_info/task_list.json +52 -0
  78. clawbench/data/test-cases/413-personal-management-personal-tools-todoist/task.json +30 -0
  79. clawbench/data/test-cases/468-rating-voting-general-glassdoor/extra_info/interview_experience.json +10 -0
  80. clawbench/data/test-cases/468-rating-voting-general-glassdoor/task.json +30 -0
  81. clawbench/data/test-cases/469-rating-voting-general-tripadvisor/extra_info/review_content.json +6 -0
  82. clawbench/data/test-cases/469-rating-voting-general-tripadvisor/task.json +30 -0
  83. clawbench/data/test-cases/470-rating-voting-general-trustpilot/extra_info/review_content.json +6 -0
  84. clawbench/data/test-cases/470-rating-voting-general-trustpilot/task.json +30 -0
  85. clawbench/data/test-cases/474-rating-voting-general-capterra/task.json +25 -0
  86. clawbench/data/test-cases/475-rating-voting-general-g2/task.json +25 -0
  87. clawbench/data/test-cases/482-creation-init-general-confluence/extra_info/content.json +3 -0
  88. clawbench/data/test-cases/482-creation-init-general-confluence/task.json +30 -0
  89. clawbench/data/test-cases/483-creation-init-general-airtable/task.json +25 -0
  90. clawbench/data/test-cases/484-creation-init-general-clickup/task.json +28 -0
  91. clawbench/data/test-cases/485-creation-init-general-webflow/task.json +25 -0
  92. clawbench/data/test-cases/486-creation-init-general-mailchimp/extra_info/content.json +3 -0
  93. clawbench/data/test-cases/486-creation-init-general-mailchimp/task.json +30 -0
  94. clawbench/data/test-cases/487-creation-init-general-typeform/extra_info/survey_questions.json +85 -0
  95. clawbench/data/test-cases/487-creation-init-general-typeform/task.json +30 -0
  96. clawbench/data/test-cases/488-creation-init-general-substack/extra_info/content.json +3 -0
  97. clawbench/data/test-cases/488-creation-init-general-substack/task.json +30 -0
  98. clawbench/data/test-cases/489-creation-init-general-ghost/extra_info/content.json +3 -0
  99. clawbench/data/test-cases/489-creation-init-general-ghost/task.json +30 -0
  100. clawbench/data/test-cases/501-creation-init-general-asana/extra_info/project_description.json +8 -0
  101. clawbench/data/test-cases/501-creation-init-general-asana/task.json +33 -0
  102. clawbench/data/test-cases/529-daily-life-shopping-delivery-king-arthur-baking/task.json +25 -0
  103. clawbench/data/test-cases/533-daily-life-utilities-inmyarea/task.json +25 -0
  104. clawbench/data/test-cases/535-daily-life-home-home-depot/task.json +25 -0
  105. clawbench/data/test-cases/537-daily-life-food-crumbl/task.json +25 -0
  106. clawbench/data/test-cases/539-daily-life-health-jefit/task.json +25 -0
  107. clawbench/data/test-cases/542-daily-life-pets-wag/task.json +25 -0
  108. clawbench/data/test-cases/551-finance-investment-crypto-wallet-trezor/task.json +25 -0
  109. clawbench/data/test-cases/552-finance-investment-business-payment-plooto/task.json +25 -0
  110. clawbench/data/test-cases/555-finance-investment-insurance-insureon/task.json +25 -0
  111. clawbench/data/test-cases/559-finance-investment-crowdfunding-frontfundr/task.json +25 -0
  112. clawbench/data/test-cases/564-daily-life-event-registration-race-roster/task.json +25 -0
  113. clawbench/data/test-cases/565-job-search-hr-job-search-jopwell/task.json +25 -0
  114. clawbench/data/test-cases/566-job-search-hr-job-search-ziprecruiter/extra_info/listing_details.json +26 -0
  115. clawbench/data/test-cases/566-job-search-hr-job-search-ziprecruiter/task.json +30 -0
  116. clawbench/data/test-cases/569-job-search-hr-job-search-careerbuilder/task.json +25 -0
  117. clawbench/data/test-cases/570-job-search-hr-job-search-hired/task.json +25 -0
  118. clawbench/data/test-cases/571-job-search-hr-recruitment-mgmt-workable/extra_info/listing_details.json +26 -0
  119. clawbench/data/test-cases/571-job-search-hr-recruitment-mgmt-workable/task.json +30 -0
  120. clawbench/data/test-cases/576-office-secretary-tasks-reports-ftc-reportfraud/task.json +25 -0
  121. clawbench/data/test-cases/583-office-secretary-tasks-support-tickets-freshdesk/task.json +25 -0
  122. clawbench/data/test-cases/598-academia-research-legal-docs-formswift/task.json +25 -0
  123. clawbench/data/test-cases/606-education-learning-kids-courses-outschool/task.json +25 -0
  124. clawbench/data/test-cases/607-education-learning-art-courses-creativebug/task.json +25 -0
  125. clawbench/data/test-cases/609-education-learning-meditation-spirit-rock-meditation-center/task.json +25 -0
  126. clawbench/data/test-cases/615-travel-flights-spirit-airlines/task.json +25 -0
  127. clawbench/data/test-cases/618-travel-train-bus-12go-asia/task.json +25 -0
  128. clawbench/data/test-cases/625-travel-camping-outdoor-parks-canada-reservations/task.json +25 -0
  129. clawbench/data/test-cases/626-travel-bus-flixbus/task.json +25 -0
  130. clawbench/data/test-cases/627-travel-flights-momondo/task.json +25 -0
  131. clawbench/data/test-cases/632-shopping-commerce-beauty-care-olaplex/task.json +25 -0
  132. clawbench/data/test-cases/634-shopping-commerce-apparel-dooney-bourke/task.json +25 -0
  133. clawbench/data/test-cases/635-shopping-commerce-gifts-uncommon-goods/task.json +25 -0
  134. clawbench/data/test-cases/636-shopping-commerce-auto-parts-rockauto/task.json +25 -0
  135. clawbench/data/test-cases/638-shopping-commerce-print-custom-vistaprint/task.json +25 -0
  136. clawbench/data/test-cases/639-shopping-commerce-luxury-mansur-gavriel/task.json +25 -0
  137. clawbench/data/test-cases/671-entertainment-gaming-humble-bundle/task.json +25 -0
  138. clawbench/data/test-cases/672-entertainment-hobbies-anime-streaming-crunchyroll/task.json +25 -0
  139. clawbench/data/test-cases/674-entertainment-hobbies-masterclass-masterclass/task.json +25 -0
  140. clawbench/data/test-cases/676-government-civic-legal-docs-legalnature/task.json +25 -0
  141. clawbench/data/test-cases/685-personal-management-budget-mgmt-everydollar/task.json +25 -0
  142. clawbench/data/test-cases/687-personal-management-vpn-subscription-ipvanish/task.json +25 -0
  143. clawbench/data/test-cases/688-personal-management-insurance-compare-insurify/task.json +25 -0
  144. clawbench/data/test-cases/695-automation-workflows-recurring-order-stumptown-coffee/task.json +25 -0
  145. clawbench/data/test-cases/697-automation-workflows-recurring-order-bean-box/task.json +25 -0
  146. clawbench/data/test-cases/699-automation-workflows-recurring-order-mistobox/task.json +25 -0
  147. clawbench/data/test-cases/700-deletion-revocation-data-deletion-deleteme/task.json +25 -0
  148. clawbench/data/test-cases/705-rating-voting-wine-review-vivino/task.json +25 -0
  149. clawbench/data/test-cases/706-rating-voting-beer-review-beeradvocate/task.json +25 -0
  150. clawbench/data/test-cases/707-rating-voting-social-wine-untappd/task.json +25 -0
  151. clawbench/data/test-cases/708-rating-voting-professor-review-ratemyprofessors/task.json +28 -0
  152. clawbench/data/test-cases/709-rating-voting-service-review-angi/task.json +25 -0
  153. clawbench/data/test-cases/710-creation-init-interior-design-roomsketcher/task.json +25 -0
  154. clawbench/data/test-cases/711-creation-init-color-design-coolors/task.json +25 -0
  155. clawbench/data/test-cases/712-creation-init-website-create-squarespace/task.json +25 -0
  156. clawbench/data/test-cases/713-creation-init-website-build-wix/task.json +25 -0
  157. clawbench/data/test-cases/735-home-services-maintenance-house-cleaning-bark/task.json +25 -0
  158. clawbench/data/test-cases/736-home-services-maintenance-plumbing-ace-hardware/task.json +25 -0
  159. clawbench/data/test-cases/737-home-services-maintenance-kitchen-remodel-lowes/task.json +25 -0
  160. clawbench/data/test-cases/738-home-services-maintenance-equipment-install-amazon-home-services/task.json +25 -0
  161. clawbench/data/test-cases/750-automotive-vehicle-services-car-insurance-compare-kanetix/task.json +25 -0
  162. clawbench/data/test-cases/751-automotive-vehicle-services-car-lease-sixt/task.json +25 -0
  163. clawbench/data/test-cases/754-automotive-vehicle-services-used-car-listing-autotrader/task.json +25 -0
  164. clawbench/data/test-cases/763-automotive-vehicle-services-car-lease-autoslash/task.json +25 -0
  165. clawbench/data/test-cases/766-nonprofit-charity-donation-doctors-without-borders-msf/task.json +25 -0
  166. clawbench/data/test-cases/768-nonprofit-charity-community-crowdfund-ioby/task.json +25 -0
  167. clawbench/data/test-cases/770-nonprofit-charity-volunteer-apply-on-make-a-wish-foundation-website-complete-and-submit-a-volunteer-application-form-selecting-the-wish-granter-role-and-entering-city-phoenix-az/task.json +25 -0
  168. clawbench/data/test-cases/774-nonprofit-charity-nonprofit-job-apply-charity-village/task.json +25 -0
  169. clawbench/data/test-cases/776-nonprofit-charity-volunteer-signup-idealist/task.json +25 -0
  170. clawbench/data/test-cases/778-nonprofit-charity-donation-globalgiving/extra_info/payment_info.json +3 -0
  171. clawbench/data/test-cases/778-nonprofit-charity-donation-globalgiving/task.json +30 -0
  172. clawbench/data/test-cases/780-beauty-personal-care-skincare-purchase-soko-glam/extra_info/address_info.json +4 -0
  173. clawbench/data/test-cases/780-beauty-personal-care-skincare-purchase-soko-glam/task.json +30 -0
  174. clawbench/data/test-cases/781-beauty-personal-care-beauty-booking-bluemercury/extra_info/email_info.json +3 -0
  175. clawbench/data/test-cases/781-beauty-personal-care-beauty-booking-bluemercury/task.json +30 -0
  176. clawbench/data/test-cases/782-beauty-personal-care-skincare-purchase-paulas-choice/task.json +24 -0
  177. clawbench/data/test-cases/783-beauty-personal-care-beauty-booking-ulta-beauty/task.json +24 -0
  178. clawbench/data/test-cases/785-beauty-personal-care-skincare-curology/task.json +25 -0
  179. clawbench/data/test-cases/788-beauty-personal-care-makeup-the-ordinary/task.json +25 -0
  180. clawbench/data/test-cases/789-beauty-personal-care-makeup-fenty-beauty/task.json +25 -0
  181. clawbench/data/test-cases/793-beauty-personal-care-beauty-retail-mac-cosmetics/task.json +25 -0
  182. clawbench/data/test-cases/794-beauty-personal-care-salon-booking-styleseat/task.json +25 -0
  183. clawbench/data/test-cases/795-pet-animal-care-pet-adoption-aspca/task.json +25 -0
  184. clawbench/data/test-cases/796-pet-animal-care-pet-supplies-grooming-petsmart/extra_info/pet_info.json +12 -0
  185. clawbench/data/test-cases/796-pet-animal-care-pet-supplies-grooming-petsmart/task.json +30 -0
  186. clawbench/data/test-cases/799-pet-animal-care-pet-insurance-aspca-pet-health-insurance/task.json +25 -0
  187. clawbench/data/test-cases/801-pet-animal-care-pet-friendly-travel-bringfido/task.json +25 -0
  188. clawbench/data/test-cases/803-pet-animal-care-pet-medical-pawp/extra_info/pet_info.json +12 -0
  189. clawbench/data/test-cases/803-pet-animal-care-pet-medical-pawp/task.json +30 -0
  190. clawbench/data/test-cases/807-pet-animal-care-pet-dna-embark/task.json +25 -0
  191. clawbench/data/test-cases/809-pet-animal-care-pet-adopt-petfinder/task.json +28 -0
  192. clawbench/data/test-cases/812-pet-animal-care-pet-subscription-ollie/task.json +25 -0
  193. clawbench/data/test-cases/815-personal-management-records-mgmt-myheritage/task.json +25 -0
  194. clawbench/data/test-cases/821-education-learning-reading-self-study-blinkist/task.json +25 -0
  195. clawbench/data/test-cases/861-entertainment-hobbies-movies-cineplex/task.json +25 -0
  196. clawbench/data/test-cases/862-entertainment-hobbies-movies-amc-theatres/task.json +25 -0
  197. clawbench/data/test-cases/864-entertainment-hobbies-show-tickets-ticketmaster/task.json +25 -0
  198. clawbench/data/test-cases/865-travel-outdoor-hipcamp/task.json +25 -0
  199. clawbench/data/test-cases/867-entertainment-hobbies-movies-fandango/task.json +25 -0
  200. clawbench/data/test-cases/872-daily-life-food-opentable/task.json +25 -0
  201. clawbench/data/test-cases/873-daily-life-food-resy/task.json +28 -0
  202. clawbench/data/test-cases/876-entertainment-hobbies-show-tickets-vivid-seats/task.json +25 -0
  203. clawbench/data/test-cases/877-entertainment-hobbies-show-tickets-stubhub/task.json +25 -0
  204. clawbench/data/test-cases/878-travel-outdoor-ontario-parks/task.json +25 -0
  205. clawbench/data/test-cases/883-education-learning-hobby-class-sur-la-table/task.json +25 -0
  206. clawbench/data/test-cases/884-entertainment-hobbies-experience-breakout-games/task.json +25 -0
  207. clawbench/data/test-cases/885-entertainment-hobbies-experience-bowlero/task.json +25 -0
  208. clawbench/data/test-cases/886-entertainment-hobbies-experience-topgolf/task.json +25 -0
  209. clawbench/data/test-cases/lite.json +226 -0
  210. clawbench/data/test-cases/lite.schema.json +105 -0
  211. clawbench/data/test-cases/task.schema.json +132 -0
  212. clawbench/data/tools/build_clawbench_lite_enc.py +161 -0
  213. clawbench/doctor.py +171 -0
  214. clawbench/engine.py +180 -0
  215. clawbench/generate_resume_pdf.py +140 -0
  216. clawbench/hf_upload.py +78 -0
  217. clawbench/image.py +127 -0
  218. clawbench/paths.py +150 -0
  219. clawbench/resume_template.json +104 -0
  220. clawbench/run.py +942 -0
  221. clawbench/tui.py +1401 -0
  222. clawbench_cli-0.1.2.dist-info/METADATA +770 -0
  223. clawbench_cli-0.1.2.dist-info/RECORD +226 -0
  224. clawbench_cli-0.1.2.dist-info/WHEEL +4 -0
  225. clawbench_cli-0.1.2.dist-info/entry_points.txt +4 -0
  226. clawbench_cli-0.1.2.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,30 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 7,
5
+ "metaclass": "daily-life",
6
+ "class": "food",
7
+ "description": "On Instacart, purchase all ingredients for 2-day meal plan recipes, with the nearest supermarket auto-selected",
8
+ "sites_involved": [
9
+ "instacart.com"
10
+ ],
11
+ "platform": "instacart",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "On Instacart, purchase all ingredients for 2-day meal plan recipes, with the nearest supermarket auto-selected",
19
+ "eval_schema": {
20
+ "url_pattern": "__PLACEHOLDER_WILL_NOT_MATCH__",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": [
25
+ {
26
+ "path": "extra_info/meal_plan.json",
27
+ "description": "2-day meal plan with recipes and dietary restrictions"
28
+ }
29
+ ]
30
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 11,
5
+ "metaclass": "daily-life",
6
+ "class": "housing",
7
+ "description": "Search Zillow for a one-bedroom apartment in Toronto downtown under $3500/month, select one and submit a rental application",
8
+ "sites_involved": [
9
+ "zillow.com"
10
+ ],
11
+ "platform": "zillow",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "Search Zillow for a one-bedroom apartment in Toronto downtown under $3500/month, select one and submit a rental application",
19
+ "eval_schema": {
20
+ "url_pattern": "zillow\\.com/rentals/api/rcf/v1/rcf",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": []
25
+ }
@@ -0,0 +1,26 @@
1
+ {
2
+ "listing_title": "Bright 1BR in Toronto Annex, Furnished, July-August Sublet",
3
+ "description": "Spacious furnished one-bedroom apartment in the heart of Toronto's Annex neighborhood. Walking distance to U of T campus, TTC subway, and Bloor Street shops and restaurants. The unit features hardwood floors, in-suite laundry, a fully equipped kitchen, and a sunny south-facing balcony. Perfect for visiting scholars or summer interns.",
4
+ "price": 2400,
5
+ "currency": "CAD",
6
+ "period": "monthly",
7
+ "available_from": "2026-07-01",
8
+ "available_to": "2026-08-31",
9
+ "furnished": true,
10
+ "bedrooms": 1,
11
+ "bathrooms": 1,
12
+ "amenities": [
13
+ "In-suite laundry",
14
+ "Dishwasher",
15
+ "Balcony",
16
+ "Air conditioning",
17
+ "WiFi included"
18
+ ],
19
+ "photos": [
20
+ "living_room.jpg",
21
+ "bedroom.jpg",
22
+ "kitchen.jpg",
23
+ "balcony.jpg"
24
+ ],
25
+ "contact_email": "alex.green.uoft@clawbench.cc"
26
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 15,
5
+ "metaclass": "daily-life",
6
+ "class": "housing",
7
+ "description": "Post a sublet listing on Craigslist: one-bedroom near Toronto Annex, $2400/month, July-August, furnished",
8
+ "sites_involved": [
9
+ "craigslist.org"
10
+ ],
11
+ "platform": "craigslist",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "Post a sublet listing on Craigslist: one-bedroom near Toronto Annex, $2400/month, July-August, furnished",
19
+ "eval_schema": {
20
+ "url_pattern": "post\\.craigslist\\.org/k/",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": [
25
+ {
26
+ "path": "extra_info/listing_details.json",
27
+ "description": "Sublet listing details including description, amenities, and photos"
28
+ }
29
+ ]
30
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 35,
5
+ "metaclass": "daily-life",
6
+ "class": "health-medical",
7
+ "description": "Sign up on BetterHelp, book a counseling session, and complete the initial questionnaire",
8
+ "sites_involved": [
9
+ "betterhelp.com"
10
+ ],
11
+ "platform": "betterhelp",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "Sign up on BetterHelp, book a counseling session, and complete the initial questionnaire",
19
+ "eval_schema": {
20
+ "url_pattern": "betterhelp\\.com/api/ai-quiz/submit-summary",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": []
25
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 41,
5
+ "metaclass": "daily-life",
6
+ "class": "pets",
7
+ "description": "Book daytime dog walking service on Rover for Monday through Friday next week, large dog (Golden Retriever)",
8
+ "sites_involved": [
9
+ "rover.com"
10
+ ],
11
+ "platform": "rover",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "Book daytime dog walking service on Rover for Monday through Friday next week, large dog (Golden Retriever)",
19
+ "eval_schema": {
20
+ "url_pattern": "rover\\.com/api/v7/people/.+/services/dog-walking/contactability",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": []
25
+ }
@@ -0,0 +1,12 @@
1
+ {
2
+ "name": "Maple",
3
+ "species": "Dog",
4
+ "breed": "Golden Retriever",
5
+ "age_years": 4,
6
+ "weight_lbs": 35,
7
+ "sex": "Female (spayed)",
8
+ "date_of_birth": "2022-03-15",
9
+ "vaccinations_up_to_date": true,
10
+ "microchip_id": "985121012345678",
11
+ "dietary_notes": "Grain-free kibble, sensitive stomach"
12
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 43,
5
+ "metaclass": "daily-life",
6
+ "class": "pets",
7
+ "description": "Book 5-day pet boarding next month on Rover (medium-sized dog), sitter rated 4.8+",
8
+ "sites_involved": [
9
+ "rover.com"
10
+ ],
11
+ "platform": "rover",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "Book 5-day pet boarding next month on Rover (medium-sized dog), sitter rated 4.8+",
19
+ "eval_schema": {
20
+ "url_pattern": "rover\\.com/api/v\\d+/people/[^/]+/services/overnight-boarding/contactability",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": [
25
+ {
26
+ "path": "extra_info/pet_info.json",
27
+ "description": "Pet information for Alex Green's dog Maple (Golden Retriever, 4 years, 35 lbs)"
28
+ }
29
+ ]
30
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 45,
5
+ "metaclass": "daily-life",
6
+ "class": "personal-care",
7
+ "description": "Book a women's haircut on Booksy for the upcoming Saturday afternoon",
8
+ "sites_involved": [
9
+ "booksy.com"
10
+ ],
11
+ "platform": "booksy",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "Book a women's haircut on Booksy for the upcoming Saturday afternoon",
19
+ "eval_schema": {
20
+ "url_pattern": "booksy\\.com/core/v\\d+/customer_api/me/appointments/business/\\d+/?$",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": []
25
+ }
@@ -0,0 +1,7 @@
1
+ {
2
+ "note": "Use home address from alex_green_personal_info.json",
3
+ "move out address": "Unit 1208, 664 Spadina Ave, Toronto, ON M5S 2H7, Canada",
4
+ "move in address": "450 Front St W, Toronto, ON M5V 0V7",
5
+ "Task size": "Large",
6
+ "Need Vehicle": "Yes, a Car"
7
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 47,
5
+ "metaclass": "daily-life",
6
+ "class": "personal-care",
7
+ "description": "Find a moving helper on TaskRabbit, next Saturday 9am-1pm, 1 bedroom apartment",
8
+ "sites_involved": [
9
+ "taskrabbit.com"
10
+ ],
11
+ "platform": "taskrabbit",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "Find a moving helper on TaskRabbit, next Saturday 9am-1pm, 1 bedroom apartment",
19
+ "eval_schema": {
20
+ "url_pattern": "taskrabbit\\.(com|ca)/(api/v\\d+/jobs|book/\\d+/confirm)",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": [
25
+ {
26
+ "path": "extra_info/address_info.json",
27
+ "description": "Address information (references alex_green_personal_info.json)"
28
+ }
29
+ ]
30
+ }
@@ -0,0 +1,5 @@
1
+ {
2
+ "job_url": "https://boards.greenhouse.io/example/jobs/1234567",
3
+ "job_title": "Senior Software Engineer",
4
+ "company": "Example Corp"
5
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 86,
5
+ "metaclass": "job-search-hr",
6
+ "class": "cv-autofill",
7
+ "description": "Extract information from resume.pdf and fill out the Meta Senior Software Engineer application on Greenhouse",
8
+ "sites_involved": [
9
+ "greenhouse.com"
10
+ ],
11
+ "platform": "greenhouse-meta",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "Extract information from resume.pdf and fill out the Meta Senior Software Engineer application on Greenhouse",
19
+ "eval_schema": {
20
+ "url_pattern": "boards-api\\.greenhouse\\.io/v1/boards/.+/jobs/\\d+|job-boards\\.greenhouse\\.io/.+/jobs/\\d+",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": [
25
+ {
26
+ "path": "extra_info/job_links.json",
27
+ "description": "Job posting URL(s) to apply to"
28
+ }
29
+ ]
30
+ }
@@ -0,0 +1,5 @@
1
+ {
2
+ "job_url": "https://boards.greenhouse.io/example/jobs/1234567",
3
+ "job_title": "Senior Software Engineer",
4
+ "company": "Example Corp"
5
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 89,
5
+ "metaclass": "job-search-hr",
6
+ "class": "cv-autofill",
7
+ "description": "Use Simplify Jobs to one-click auto-fill the Amazon Applied Scientist application",
8
+ "sites_involved": [
9
+ "simplify.jobs"
10
+ ],
11
+ "platform": "simplify-jobs",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "Use Simplify Jobs to one-click auto-fill the Amazon Applied Scientist application",
19
+ "eval_schema": {
20
+ "url_pattern": "api\\.simplify\\.jobs/v2/candidate/me/application",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": [
25
+ {
26
+ "path": "extra_info/job_links.json",
27
+ "description": "Job posting URL(s) to apply to"
28
+ }
29
+ ]
30
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 91,
5
+ "metaclass": "job-search-hr",
6
+ "class": "job-apply",
7
+ "description": "Search \"Senior Software Engineer\" (Toronto) on Indeed, apply to the top-ranked listing",
8
+ "sites_involved": [
9
+ "indeed.com"
10
+ ],
11
+ "platform": "indeed",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "Search \"Senior Software Engineer\" (Toronto) on Indeed, apply to the top-ranked listing",
19
+ "eval_schema": {
20
+ "url_pattern": "smartapply\\.indeed\\.com/beta/indeedapply/resumeapply|apply\\.indeed\\.com/indeedapply/postresumeapply|smartapply\\.indeed\\.com/beta/indeedapply/submit",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": []
25
+ }
@@ -0,0 +1,28 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 120,
5
+ "metaclass": "office-secretary-tasks",
6
+ "class": "email-mgmt",
7
+ "description": "On Purelymail, send an email to jordan.peters@clawbench.cc with subject \"Meeting Reschedule\"",
8
+ "sites_involved": [
9
+ "mail.google.com"
10
+ ],
11
+ "platform": "purelymail",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "On Purelymail, send an email to jordan.peters@clawbench.cc with subject \"Meeting Reschedule\"",
19
+ "eval_schema": {
20
+ "url_pattern": "inbox\\.purelymail\\.com",
21
+ "method": "POST",
22
+ "body": {
23
+ "_action": "send"
24
+ }
25
+ },
26
+ "time_limit": 30,
27
+ "extra_info": []
28
+ }
@@ -0,0 +1,28 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 121,
5
+ "metaclass": "office-secretary-tasks",
6
+ "class": "email-mgmt",
7
+ "description": "Set up Purelymail vacation responder, content: Out of work from June 01 to 15; back on June 16.",
8
+ "sites_involved": [
9
+ "mail.google.com"
10
+ ],
11
+ "platform": "purelymail",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "Set up Purelymail vacation responder, content: Out of work from June 01 to 15; back on June 16.",
19
+ "eval_schema": {
20
+ "url_pattern": "inbox\\.purelymail\\.com",
21
+ "method": "POST",
22
+ "body": {
23
+ "_action": "plugin.managesieve-save"
24
+ }
25
+ },
26
+ "time_limit": 30,
27
+ "extra_info": []
28
+ }
@@ -0,0 +1,28 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 128,
5
+ "metaclass": "office-secretary-tasks",
6
+ "class": "email-mgmt",
7
+ "description": "Create a Purelymail filter: subject \"invoice\" -> label \"Invoices\" (Move to Invoices mail folder) + flagged",
8
+ "sites_involved": [
9
+ "mail.google.com"
10
+ ],
11
+ "platform": "purelymail",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "Create a Purelymail filter: subject \"invoice\" -> label \"Invoices\" (Move to Invoices mail folder) + flagged",
19
+ "eval_schema": {
20
+ "url_pattern": "inbox\\.purelymail\\.com",
21
+ "method": "POST",
22
+ "body": {
23
+ "_action": "plugin.managesieve-save"
24
+ }
25
+ },
26
+ "time_limit": 30,
27
+ "extra_info": []
28
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 134,
5
+ "metaclass": "office-secretary-tasks",
6
+ "class": "calendar",
7
+ "description": "Set up availability on Calendly: Mon-Fri 10-12 and 2-4",
8
+ "sites_involved": [
9
+ "calendly.com"
10
+ ],
11
+ "platform": "calendly",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "Set up availability on Calendly: Mon-Fri 10-12 and 2-4",
19
+ "eval_schema": {
20
+ "url_pattern": "calendly\\.com/app/intro/api/availability",
21
+ "method": "PUT"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": []
25
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "participants": [
3
+ {
4
+ "name": "Alex Green",
5
+ "email": "alex.green.uoft@clawbench.cc"
6
+ },
7
+ {
8
+ "name": "Jordan Peters",
9
+ "email": "jordan.peters@pinecresttech.com"
10
+ },
11
+ {
12
+ "name": "Emily Green",
13
+ "email": "emily.green@example.com"
14
+ },
15
+ {
16
+ "name": "Sam Chen",
17
+ "email": "sam.chen@example.com"
18
+ },
19
+ {
20
+ "name": "Taylor Kim",
21
+ "email": "taylor.kim@example.com"
22
+ }
23
+ ],
24
+ "time_options": [
25
+ "2026-04-07 10:00 AM ET",
26
+ "2026-04-08 2:00 PM ET",
27
+ "2026-04-09 11:00 AM ET",
28
+ "2026-04-10 3:00 PM ET"
29
+ ]
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 137,
5
+ "metaclass": "office-secretary-tasks",
6
+ "class": "calendar",
7
+ "description": "Create a poll on Doodle: 5-person meeting with 4 time options (60 mins duration) and send invite link to other four people",
8
+ "sites_involved": [
9
+ "doodle.com"
10
+ ],
11
+ "platform": "doodle",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "Create a poll on Doodle: 5-person meeting with 4 time options (60 mins duration) and send invite link to other four people",
19
+ "eval_schema": {
20
+ "url_pattern": "api\\.doodle\\.com/scheduling/scheduling-attempts",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": [
25
+ {
26
+ "path": "extra_info/meeting_details.json",
27
+ "description": "Meeting participants and proposed time options"
28
+ }
29
+ ]
30
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 139,
5
+ "metaclass": "office-secretary-tasks",
6
+ "class": "calendar",
7
+ "description": "Create a Calendly event type \"Office Hours\": 30min, Tue/Thu 2-4pm",
8
+ "sites_involved": [
9
+ "calendly.com"
10
+ ],
11
+ "platform": "calendly",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "Create a Calendly event type \"Office Hours\": 30min, Tue/Thu 2-4pm",
19
+ "eval_schema": {
20
+ "url_pattern": "calendly\\.com/app/intro/api/availability",
21
+ "method": "PUT"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": []
25
+ }
@@ -0,0 +1,29 @@
1
+ {
2
+ "tasks": [
3
+ {
4
+ "name": "Define Q3 OKRs",
5
+ "list": "Today"
6
+ },
7
+ {
8
+ "name": "Review architecture RFC",
9
+ "list": "Today"
10
+ },
11
+ {
12
+ "name": "Set up CI/CD pipeline",
13
+ "list": "This Week"
14
+ },
15
+ {
16
+ "name": "Write integration tests",
17
+ "list": "This Week"
18
+ },
19
+ {
20
+ "name": "Prepare sprint demo",
21
+ "list": "This Week"
22
+ }
23
+ ],
24
+ "lists": [
25
+ {"name": "Next Week"},
26
+ {"name": "Next Next Week"},
27
+ {"name": "Next Month"}
28
+ ]
29
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "$schema": "../task.schema.json",
3
+ "metadata": {
4
+ "task_id": 142,
5
+ "metaclass": "office-secretary-tasks",
6
+ "class": "collab",
7
+ "description": "Create a Trello board \"Q3 Sprint Planning\" with 3 lists + 5 cards",
8
+ "sites_involved": [
9
+ "trello.com"
10
+ ],
11
+ "platform": "trello",
12
+ "common_info": {
13
+ "email_credentials": "credentials to use the assigned disposable email account",
14
+ "user_info": "alex_green_personal_info.json; the dummy user's personal information",
15
+ "user_resume": "PDF resume with disposable email account injected"
16
+ }
17
+ },
18
+ "instruction": "Create a Trello board \"Q3 Sprint Planning\" with 3 lists + 5 cards",
19
+ "eval_schema": {
20
+ "url_pattern": "trello\\.com/1/boards",
21
+ "method": "POST"
22
+ },
23
+ "time_limit": 30,
24
+ "extra_info": [
25
+ {
26
+ "path": "extra_info/task_list.json",
27
+ "description": "Task list with names, assignees, due dates, and priorities"
28
+ }
29
+ ]
30
+ }