clawbench-cli 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. clawbench/__init__.py +35 -0
  2. clawbench/__main__.py +8 -0
  3. clawbench/batch.py +619 -0
  4. clawbench/cli.py +397 -0
  5. clawbench/data/chrome-extension/README.md +127 -0
  6. clawbench/data/chrome-extension/background.js +50 -0
  7. clawbench/data/chrome-extension/content.js +70 -0
  8. clawbench/data/chrome-extension/manifest.json +25 -0
  9. clawbench/data/chrome-extension/setup.sh +27 -0
  10. clawbench/data/chrome-extension/stealth.js +200 -0
  11. clawbench/data/docker/Dockerfile +51 -0
  12. clawbench/data/docker/entrypoint.sh +394 -0
  13. clawbench/data/docker/setup-openclaw.sh +112 -0
  14. clawbench/data/eval/README.md +95 -0
  15. clawbench/data/eval/agentic_eval.md +53 -0
  16. clawbench/data/extension-server/.python-version +1 -0
  17. clawbench/data/extension-server/README.md +54 -0
  18. clawbench/data/extension-server/pyproject.toml +7 -0
  19. clawbench/data/extension-server/server.py +360 -0
  20. clawbench/data/extension-server/uv.lock +644 -0
  21. clawbench/data/models/model.schema.json +44 -0
  22. clawbench/data/models/models.example.yaml +16 -0
  23. clawbench/data/shared/alex_green_personal_info.json +451 -0
  24. clawbench/data/test-cases/001-daily-life-food-uber-eats/task.json +25 -0
  25. clawbench/data/test-cases/002-daily-life-food-doordash/task.json +25 -0
  26. clawbench/data/test-cases/004-daily-life-food-instacart/extra_info/grocery_list.json +36 -0
  27. clawbench/data/test-cases/004-daily-life-food-instacart/task.json +30 -0
  28. clawbench/data/test-cases/006-daily-life-food-uber-eats/task.json +24 -0
  29. clawbench/data/test-cases/007-daily-life-food-instacart/extra_info/meal_plan.json +21 -0
  30. clawbench/data/test-cases/007-daily-life-food-instacart/task.json +30 -0
  31. clawbench/data/test-cases/011-daily-life-housing-zillow/task.json +25 -0
  32. clawbench/data/test-cases/015-daily-life-housing-craigslist/extra_info/listing_details.json +26 -0
  33. clawbench/data/test-cases/015-daily-life-housing-craigslist/task.json +30 -0
  34. clawbench/data/test-cases/035-daily-life-health-medical-betterhelp/task.json +25 -0
  35. clawbench/data/test-cases/041-daily-life-pets-rover/task.json +25 -0
  36. clawbench/data/test-cases/043-daily-life-pets-rover/extra_info/pet_info.json +12 -0
  37. clawbench/data/test-cases/043-daily-life-pets-rover/task.json +30 -0
  38. clawbench/data/test-cases/045-daily-life-personal-care-booksy/task.json +25 -0
  39. clawbench/data/test-cases/047-daily-life-personal-care-taskrabbit/extra_info/address_info.json +7 -0
  40. clawbench/data/test-cases/047-daily-life-personal-care-taskrabbit/task.json +30 -0
  41. clawbench/data/test-cases/086-job-search-hr-cv-autofill-greenhouse-meta/extra_info/job_links.json +5 -0
  42. clawbench/data/test-cases/086-job-search-hr-cv-autofill-greenhouse-meta/task.json +30 -0
  43. clawbench/data/test-cases/089-job-search-hr-cv-autofill-simplify-jobs/extra_info/job_links.json +5 -0
  44. clawbench/data/test-cases/089-job-search-hr-cv-autofill-simplify-jobs/task.json +30 -0
  45. clawbench/data/test-cases/091-job-search-hr-job-apply-indeed/task.json +25 -0
  46. clawbench/data/test-cases/120-office-secretary-tasks-email-mgmt-purelymail/task.json +28 -0
  47. clawbench/data/test-cases/121-office-secretary-tasks-email-mgmt-purelymail/task.json +28 -0
  48. clawbench/data/test-cases/128-office-secretary-tasks-email-mgmt-purelymail/task.json +28 -0
  49. clawbench/data/test-cases/134-office-secretary-tasks-calendar-calendly/task.json +25 -0
  50. clawbench/data/test-cases/137-office-secretary-tasks-calendar-doodle/extra_info/meeting_details.json +30 -0
  51. clawbench/data/test-cases/137-office-secretary-tasks-calendar-doodle/task.json +30 -0
  52. clawbench/data/test-cases/139-office-secretary-tasks-calendar-calendly/task.json +25 -0
  53. clawbench/data/test-cases/142-office-secretary-tasks-collab-trello/extra_info/task_list.json +29 -0
  54. clawbench/data/test-cases/142-office-secretary-tasks-collab-trello/task.json +30 -0
  55. clawbench/data/test-cases/179-dev-tech-github-ops-github/extra_info/config.json +13 -0
  56. clawbench/data/test-cases/179-dev-tech-github-ops-github/task.json +30 -0
  57. clawbench/data/test-cases/180-dev-tech-github-ops-github/task.json +25 -0
  58. clawbench/data/test-cases/215-academia-research-paper-tables-overleaf/extra_info/raw_results.json +47 -0
  59. clawbench/data/test-cases/215-academia-research-paper-tables-overleaf/task.json +30 -0
  60. clawbench/data/test-cases/242-academia-research-research-tools-overleaf/task.json +25 -0
  61. clawbench/data/test-cases/246-academia-research-research-tools-zotero/task.json +25 -0
  62. clawbench/data/test-cases/247-academia-research-research-tools-semantic-scholar/task.json +25 -0
  63. clawbench/data/test-cases/265-education-learning-general-coursera/task.json +25 -0
  64. clawbench/data/test-cases/266-education-learning-general-leetcode/extra_info/solution_code.py +9 -0
  65. clawbench/data/test-cases/266-education-learning-general-leetcode/task.json +30 -0
  66. clawbench/data/test-cases/273-education-learning-general-edx/task.json +25 -0
  67. clawbench/data/test-cases/274-education-learning-general-udemy/task.json +25 -0
  68. clawbench/data/test-cases/279-travel-general-airbnb/task.json +25 -0
  69. clawbench/data/test-cases/280-travel-general-booking-com/task.json +25 -0
  70. clawbench/data/test-cases/363-entertainment-hobbies-general-ticketmaster/task.json +25 -0
  71. clawbench/data/test-cases/369-entertainment-hobbies-general-goodreads/extra_info/book_list.json +14 -0
  72. clawbench/data/test-cases/369-entertainment-hobbies-general-goodreads/task.json +30 -0
  73. clawbench/data/test-cases/372-entertainment-hobbies-general-eventbrite/extra_info/event_details.json +10 -0
  74. clawbench/data/test-cases/372-entertainment-hobbies-general-eventbrite/task.json +30 -0
  75. clawbench/data/test-cases/403-personal-management-account-security-1password-web/extra_info/credentials.json +34 -0
  76. clawbench/data/test-cases/403-personal-management-account-security-1password-web/task.json +30 -0
  77. clawbench/data/test-cases/413-personal-management-personal-tools-todoist/extra_info/task_list.json +52 -0
  78. clawbench/data/test-cases/413-personal-management-personal-tools-todoist/task.json +30 -0
  79. clawbench/data/test-cases/468-rating-voting-general-glassdoor/extra_info/interview_experience.json +10 -0
  80. clawbench/data/test-cases/468-rating-voting-general-glassdoor/task.json +30 -0
  81. clawbench/data/test-cases/469-rating-voting-general-tripadvisor/extra_info/review_content.json +6 -0
  82. clawbench/data/test-cases/469-rating-voting-general-tripadvisor/task.json +30 -0
  83. clawbench/data/test-cases/470-rating-voting-general-trustpilot/extra_info/review_content.json +6 -0
  84. clawbench/data/test-cases/470-rating-voting-general-trustpilot/task.json +30 -0
  85. clawbench/data/test-cases/474-rating-voting-general-capterra/task.json +25 -0
  86. clawbench/data/test-cases/475-rating-voting-general-g2/task.json +25 -0
  87. clawbench/data/test-cases/482-creation-init-general-confluence/extra_info/content.json +3 -0
  88. clawbench/data/test-cases/482-creation-init-general-confluence/task.json +30 -0
  89. clawbench/data/test-cases/483-creation-init-general-airtable/task.json +25 -0
  90. clawbench/data/test-cases/484-creation-init-general-clickup/task.json +28 -0
  91. clawbench/data/test-cases/485-creation-init-general-webflow/task.json +25 -0
  92. clawbench/data/test-cases/486-creation-init-general-mailchimp/extra_info/content.json +3 -0
  93. clawbench/data/test-cases/486-creation-init-general-mailchimp/task.json +30 -0
  94. clawbench/data/test-cases/487-creation-init-general-typeform/extra_info/survey_questions.json +85 -0
  95. clawbench/data/test-cases/487-creation-init-general-typeform/task.json +30 -0
  96. clawbench/data/test-cases/488-creation-init-general-substack/extra_info/content.json +3 -0
  97. clawbench/data/test-cases/488-creation-init-general-substack/task.json +30 -0
  98. clawbench/data/test-cases/489-creation-init-general-ghost/extra_info/content.json +3 -0
  99. clawbench/data/test-cases/489-creation-init-general-ghost/task.json +30 -0
  100. clawbench/data/test-cases/501-creation-init-general-asana/extra_info/project_description.json +8 -0
  101. clawbench/data/test-cases/501-creation-init-general-asana/task.json +33 -0
  102. clawbench/data/test-cases/529-daily-life-shopping-delivery-king-arthur-baking/task.json +25 -0
  103. clawbench/data/test-cases/533-daily-life-utilities-inmyarea/task.json +25 -0
  104. clawbench/data/test-cases/535-daily-life-home-home-depot/task.json +25 -0
  105. clawbench/data/test-cases/537-daily-life-food-crumbl/task.json +25 -0
  106. clawbench/data/test-cases/539-daily-life-health-jefit/task.json +25 -0
  107. clawbench/data/test-cases/542-daily-life-pets-wag/task.json +25 -0
  108. clawbench/data/test-cases/551-finance-investment-crypto-wallet-trezor/task.json +25 -0
  109. clawbench/data/test-cases/552-finance-investment-business-payment-plooto/task.json +25 -0
  110. clawbench/data/test-cases/555-finance-investment-insurance-insureon/task.json +25 -0
  111. clawbench/data/test-cases/559-finance-investment-crowdfunding-frontfundr/task.json +25 -0
  112. clawbench/data/test-cases/564-daily-life-event-registration-race-roster/task.json +25 -0
  113. clawbench/data/test-cases/565-job-search-hr-job-search-jopwell/task.json +25 -0
  114. clawbench/data/test-cases/566-job-search-hr-job-search-ziprecruiter/extra_info/listing_details.json +26 -0
  115. clawbench/data/test-cases/566-job-search-hr-job-search-ziprecruiter/task.json +30 -0
  116. clawbench/data/test-cases/569-job-search-hr-job-search-careerbuilder/task.json +25 -0
  117. clawbench/data/test-cases/570-job-search-hr-job-search-hired/task.json +25 -0
  118. clawbench/data/test-cases/571-job-search-hr-recruitment-mgmt-workable/extra_info/listing_details.json +26 -0
  119. clawbench/data/test-cases/571-job-search-hr-recruitment-mgmt-workable/task.json +30 -0
  120. clawbench/data/test-cases/576-office-secretary-tasks-reports-ftc-reportfraud/task.json +25 -0
  121. clawbench/data/test-cases/583-office-secretary-tasks-support-tickets-freshdesk/task.json +25 -0
  122. clawbench/data/test-cases/598-academia-research-legal-docs-formswift/task.json +25 -0
  123. clawbench/data/test-cases/606-education-learning-kids-courses-outschool/task.json +25 -0
  124. clawbench/data/test-cases/607-education-learning-art-courses-creativebug/task.json +25 -0
  125. clawbench/data/test-cases/609-education-learning-meditation-spirit-rock-meditation-center/task.json +25 -0
  126. clawbench/data/test-cases/615-travel-flights-spirit-airlines/task.json +25 -0
  127. clawbench/data/test-cases/618-travel-train-bus-12go-asia/task.json +25 -0
  128. clawbench/data/test-cases/625-travel-camping-outdoor-parks-canada-reservations/task.json +25 -0
  129. clawbench/data/test-cases/626-travel-bus-flixbus/task.json +25 -0
  130. clawbench/data/test-cases/627-travel-flights-momondo/task.json +25 -0
  131. clawbench/data/test-cases/632-shopping-commerce-beauty-care-olaplex/task.json +25 -0
  132. clawbench/data/test-cases/634-shopping-commerce-apparel-dooney-bourke/task.json +25 -0
  133. clawbench/data/test-cases/635-shopping-commerce-gifts-uncommon-goods/task.json +25 -0
  134. clawbench/data/test-cases/636-shopping-commerce-auto-parts-rockauto/task.json +25 -0
  135. clawbench/data/test-cases/638-shopping-commerce-print-custom-vistaprint/task.json +25 -0
  136. clawbench/data/test-cases/639-shopping-commerce-luxury-mansur-gavriel/task.json +25 -0
  137. clawbench/data/test-cases/671-entertainment-gaming-humble-bundle/task.json +25 -0
  138. clawbench/data/test-cases/672-entertainment-hobbies-anime-streaming-crunchyroll/task.json +25 -0
  139. clawbench/data/test-cases/674-entertainment-hobbies-masterclass-masterclass/task.json +25 -0
  140. clawbench/data/test-cases/676-government-civic-legal-docs-legalnature/task.json +25 -0
  141. clawbench/data/test-cases/685-personal-management-budget-mgmt-everydollar/task.json +25 -0
  142. clawbench/data/test-cases/687-personal-management-vpn-subscription-ipvanish/task.json +25 -0
  143. clawbench/data/test-cases/688-personal-management-insurance-compare-insurify/task.json +25 -0
  144. clawbench/data/test-cases/695-automation-workflows-recurring-order-stumptown-coffee/task.json +25 -0
  145. clawbench/data/test-cases/697-automation-workflows-recurring-order-bean-box/task.json +25 -0
  146. clawbench/data/test-cases/699-automation-workflows-recurring-order-mistobox/task.json +25 -0
  147. clawbench/data/test-cases/700-deletion-revocation-data-deletion-deleteme/task.json +25 -0
  148. clawbench/data/test-cases/705-rating-voting-wine-review-vivino/task.json +25 -0
  149. clawbench/data/test-cases/706-rating-voting-beer-review-beeradvocate/task.json +25 -0
  150. clawbench/data/test-cases/707-rating-voting-social-wine-untappd/task.json +25 -0
  151. clawbench/data/test-cases/708-rating-voting-professor-review-ratemyprofessors/task.json +28 -0
  152. clawbench/data/test-cases/709-rating-voting-service-review-angi/task.json +25 -0
  153. clawbench/data/test-cases/710-creation-init-interior-design-roomsketcher/task.json +25 -0
  154. clawbench/data/test-cases/711-creation-init-color-design-coolors/task.json +25 -0
  155. clawbench/data/test-cases/712-creation-init-website-create-squarespace/task.json +25 -0
  156. clawbench/data/test-cases/713-creation-init-website-build-wix/task.json +25 -0
  157. clawbench/data/test-cases/735-home-services-maintenance-house-cleaning-bark/task.json +25 -0
  158. clawbench/data/test-cases/736-home-services-maintenance-plumbing-ace-hardware/task.json +25 -0
  159. clawbench/data/test-cases/737-home-services-maintenance-kitchen-remodel-lowes/task.json +25 -0
  160. clawbench/data/test-cases/738-home-services-maintenance-equipment-install-amazon-home-services/task.json +25 -0
  161. clawbench/data/test-cases/750-automotive-vehicle-services-car-insurance-compare-kanetix/task.json +25 -0
  162. clawbench/data/test-cases/751-automotive-vehicle-services-car-lease-sixt/task.json +25 -0
  163. clawbench/data/test-cases/754-automotive-vehicle-services-used-car-listing-autotrader/task.json +25 -0
  164. clawbench/data/test-cases/763-automotive-vehicle-services-car-lease-autoslash/task.json +25 -0
  165. clawbench/data/test-cases/766-nonprofit-charity-donation-doctors-without-borders-msf/task.json +25 -0
  166. clawbench/data/test-cases/768-nonprofit-charity-community-crowdfund-ioby/task.json +25 -0
  167. clawbench/data/test-cases/770-nonprofit-charity-volunteer-apply-on-make-a-wish-foundation-website-complete-and-submit-a-volunteer-application-form-selecting-the-wish-granter-role-and-entering-city-phoenix-az/task.json +25 -0
  168. clawbench/data/test-cases/774-nonprofit-charity-nonprofit-job-apply-charity-village/task.json +25 -0
  169. clawbench/data/test-cases/776-nonprofit-charity-volunteer-signup-idealist/task.json +25 -0
  170. clawbench/data/test-cases/778-nonprofit-charity-donation-globalgiving/extra_info/payment_info.json +3 -0
  171. clawbench/data/test-cases/778-nonprofit-charity-donation-globalgiving/task.json +30 -0
  172. clawbench/data/test-cases/780-beauty-personal-care-skincare-purchase-soko-glam/extra_info/address_info.json +4 -0
  173. clawbench/data/test-cases/780-beauty-personal-care-skincare-purchase-soko-glam/task.json +30 -0
  174. clawbench/data/test-cases/781-beauty-personal-care-beauty-booking-bluemercury/extra_info/email_info.json +3 -0
  175. clawbench/data/test-cases/781-beauty-personal-care-beauty-booking-bluemercury/task.json +30 -0
  176. clawbench/data/test-cases/782-beauty-personal-care-skincare-purchase-paulas-choice/task.json +24 -0
  177. clawbench/data/test-cases/783-beauty-personal-care-beauty-booking-ulta-beauty/task.json +24 -0
  178. clawbench/data/test-cases/785-beauty-personal-care-skincare-curology/task.json +25 -0
  179. clawbench/data/test-cases/788-beauty-personal-care-makeup-the-ordinary/task.json +25 -0
  180. clawbench/data/test-cases/789-beauty-personal-care-makeup-fenty-beauty/task.json +25 -0
  181. clawbench/data/test-cases/793-beauty-personal-care-beauty-retail-mac-cosmetics/task.json +25 -0
  182. clawbench/data/test-cases/794-beauty-personal-care-salon-booking-styleseat/task.json +25 -0
  183. clawbench/data/test-cases/795-pet-animal-care-pet-adoption-aspca/task.json +25 -0
  184. clawbench/data/test-cases/796-pet-animal-care-pet-supplies-grooming-petsmart/extra_info/pet_info.json +12 -0
  185. clawbench/data/test-cases/796-pet-animal-care-pet-supplies-grooming-petsmart/task.json +30 -0
  186. clawbench/data/test-cases/799-pet-animal-care-pet-insurance-aspca-pet-health-insurance/task.json +25 -0
  187. clawbench/data/test-cases/801-pet-animal-care-pet-friendly-travel-bringfido/task.json +25 -0
  188. clawbench/data/test-cases/803-pet-animal-care-pet-medical-pawp/extra_info/pet_info.json +12 -0
  189. clawbench/data/test-cases/803-pet-animal-care-pet-medical-pawp/task.json +30 -0
  190. clawbench/data/test-cases/807-pet-animal-care-pet-dna-embark/task.json +25 -0
  191. clawbench/data/test-cases/809-pet-animal-care-pet-adopt-petfinder/task.json +28 -0
  192. clawbench/data/test-cases/812-pet-animal-care-pet-subscription-ollie/task.json +25 -0
  193. clawbench/data/test-cases/815-personal-management-records-mgmt-myheritage/task.json +25 -0
  194. clawbench/data/test-cases/821-education-learning-reading-self-study-blinkist/task.json +25 -0
  195. clawbench/data/test-cases/861-entertainment-hobbies-movies-cineplex/task.json +25 -0
  196. clawbench/data/test-cases/862-entertainment-hobbies-movies-amc-theatres/task.json +25 -0
  197. clawbench/data/test-cases/864-entertainment-hobbies-show-tickets-ticketmaster/task.json +25 -0
  198. clawbench/data/test-cases/865-travel-outdoor-hipcamp/task.json +25 -0
  199. clawbench/data/test-cases/867-entertainment-hobbies-movies-fandango/task.json +25 -0
  200. clawbench/data/test-cases/872-daily-life-food-opentable/task.json +25 -0
  201. clawbench/data/test-cases/873-daily-life-food-resy/task.json +28 -0
  202. clawbench/data/test-cases/876-entertainment-hobbies-show-tickets-vivid-seats/task.json +25 -0
  203. clawbench/data/test-cases/877-entertainment-hobbies-show-tickets-stubhub/task.json +25 -0
  204. clawbench/data/test-cases/878-travel-outdoor-ontario-parks/task.json +25 -0
  205. clawbench/data/test-cases/883-education-learning-hobby-class-sur-la-table/task.json +25 -0
  206. clawbench/data/test-cases/884-entertainment-hobbies-experience-breakout-games/task.json +25 -0
  207. clawbench/data/test-cases/885-entertainment-hobbies-experience-bowlero/task.json +25 -0
  208. clawbench/data/test-cases/886-entertainment-hobbies-experience-topgolf/task.json +25 -0
  209. clawbench/data/test-cases/lite.json +226 -0
  210. clawbench/data/test-cases/lite.schema.json +105 -0
  211. clawbench/data/test-cases/task.schema.json +132 -0
  212. clawbench/data/tools/build_clawbench_lite_enc.py +161 -0
  213. clawbench/doctor.py +171 -0
  214. clawbench/engine.py +180 -0
  215. clawbench/generate_resume_pdf.py +140 -0
  216. clawbench/hf_upload.py +78 -0
  217. clawbench/image.py +127 -0
  218. clawbench/paths.py +150 -0
  219. clawbench/resume_template.json +104 -0
  220. clawbench/run.py +942 -0
  221. clawbench/tui.py +1401 -0
  222. clawbench_cli-0.1.2.dist-info/METADATA +770 -0
  223. clawbench_cli-0.1.2.dist-info/RECORD +226 -0
  224. clawbench_cli-0.1.2.dist-info/WHEEL +4 -0
  225. clawbench_cli-0.1.2.dist-info/entry_points.txt +4 -0
  226. clawbench_cli-0.1.2.dist-info/licenses/LICENSE +201 -0
clawbench/cli.py ADDED
@@ -0,0 +1,397 @@
1
+ """``claw-bench`` command-line entry point (click-based).
2
+
3
+ Design notes:
4
+
5
+ - Bare ``claw-bench`` launches the TUI. This preserves muscle memory from
6
+ the old ``./run.sh`` and keeps the zero-friction experience for users
7
+ who just typed ``pip install claw-bench`` and hit enter.
8
+ - Every power-user action has an explicit subcommand so scripts don't
9
+ need to navigate a menu (``run``, ``batch``, ``build``, ``cases``,
10
+ ``models``, ``configure``, ``doctor``, ``version``).
11
+ - Subcommands are thin wrappers that delegate to the module-level
12
+ ``main()`` functions in :mod:`clawbench.run` / :mod:`clawbench.batch`.
13
+ Those modules still accept argparse argv so they can be invoked
14
+ in-process *and* via ``python -m clawbench run ...`` from the batch
15
+ runner's subprocess fan-out — one code path, two callers.
16
+
17
+ Subcommand surface intentionally kept small. Every flag the TUI exposes
18
+ is reachable from the CLI, but we don't duplicate every internal toggle
19
+ that ``run.py``/``batch.py`` support as argparse args — click just
20
+ forwards through to them via ``extra_args``.
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import os
26
+ import stat
27
+ import subprocess
28
+ import sys
29
+ from pathlib import Path
30
+
31
+ import click
32
+
33
+ from clawbench import __version__
34
+ from clawbench import doctor as _doctor
35
+ from clawbench import paths as _paths
36
+
37
+
38
+ # ---------------------------------------------------------------------------
39
+ # Small helpers
40
+ # ---------------------------------------------------------------------------
41
+
42
+ def _echo_result(r: _doctor.CheckResult) -> None:
43
+ """Render a single doctor CheckResult with color-coded status."""
44
+ symbol = {"ok": "[OK] ", "warn": "[WARN]", "fail": "[FAIL]"}.get(r.status, "[?]")
45
+ color = {"ok": "green", "warn": "yellow", "fail": "red"}.get(r.status, "white")
46
+ click.echo(f" {click.style(symbol, fg=color)} {r.name}: {r.detail}")
47
+ if r.hint and r.status != "ok":
48
+ for line in r.hint.splitlines():
49
+ click.echo(f" {click.style(line, dim=True)}")
50
+
51
+
52
+ # ---------------------------------------------------------------------------
53
+ # Root group
54
+ # ---------------------------------------------------------------------------
55
+
56
+ @click.group(
57
+ invoke_without_command=True,
58
+ context_settings={"help_option_names": ["-h", "--help"]},
59
+ )
60
+ @click.version_option(__version__, "-V", "--version", prog_name="claw-bench")
61
+ @click.pass_context
62
+ def main(ctx: click.Context) -> None:
63
+ """ClawBench — benchmark AI agents on 153 everyday web tasks.
64
+
65
+ Run without a subcommand to launch the interactive TUI. Use
66
+ ``claw-bench run``, ``batch``, ``build``, ``doctor``, etc. for
67
+ scripting.
68
+ """
69
+ if ctx.invoked_subcommand is None:
70
+ # No subcommand → TUI.
71
+ from clawbench import tui
72
+ tui.main()
73
+
74
+
75
+ # ---------------------------------------------------------------------------
76
+ # tui
77
+ # ---------------------------------------------------------------------------
78
+
79
+ @main.command("tui")
80
+ def tui_cmd() -> None:
81
+ """Launch the interactive TUI (default action if no subcommand given)."""
82
+ from clawbench import tui
83
+ tui.main()
84
+
85
+
86
+ # ---------------------------------------------------------------------------
87
+ # run
88
+ # ---------------------------------------------------------------------------
89
+
90
+ @main.command(
91
+ "run",
92
+ context_settings={
93
+ "ignore_unknown_options": True,
94
+ "allow_extra_args": True,
95
+ "help_option_names": ["-h", "--help"],
96
+ },
97
+ )
98
+ @click.argument("test_case_dir", type=click.Path(path_type=Path))
99
+ @click.argument("model", required=False)
100
+ @click.option("--human", is_flag=True, help="Human mode: expose Chrome via noVNC instead of running an agent.")
101
+ @click.option("--output-dir", type=click.Path(path_type=Path), default=None,
102
+ help="Directory to write output to (default: ./claw-output).")
103
+ @click.option("--no-build", is_flag=True, help="Skip building the container image.")
104
+ @click.option("--no-upload", is_flag=True, help="Skip HuggingFace upload even if configured.")
105
+ @click.pass_context
106
+ def run_cmd(
107
+ ctx: click.Context,
108
+ test_case_dir: Path,
109
+ model: str | None,
110
+ human: bool,
111
+ output_dir: Path | None,
112
+ no_build: bool,
113
+ no_upload: bool,
114
+ ) -> None:
115
+ """Run a single test case against a model (or in --human mode)."""
116
+ from clawbench import run as _run
117
+ # Accept three forms for the case argument:
118
+ # (a) an absolute / already-existing path (user points at their own case),
119
+ # (b) ``test-cases/<name>`` relative to the project (dev convenience),
120
+ # (c) a bare case name like ``006-daily-life-food-uber-eats`` — looked up
121
+ # inside the wheel's bundled test-cases. This is the common case from
122
+ # the TUI, which passes only the case name.
123
+ resolved = test_case_dir
124
+ if not resolved.exists():
125
+ bundled = _paths.test_cases_dir() / test_case_dir.name
126
+ if bundled.exists():
127
+ resolved = bundled
128
+ argv: list[str] = [str(resolved)]
129
+ if model:
130
+ argv.append(model)
131
+ if human:
132
+ argv.append("--human")
133
+ if output_dir:
134
+ argv += ["--output-dir", str(output_dir)]
135
+ if no_build:
136
+ argv.append("--no-build")
137
+ if no_upload:
138
+ argv.append("--no-upload")
139
+ argv += list(ctx.args)
140
+ _run.main(argv)
141
+
142
+
143
+ # ---------------------------------------------------------------------------
144
+ # batch
145
+ # ---------------------------------------------------------------------------
146
+
147
+ @main.command(
148
+ "batch",
149
+ context_settings={
150
+ "ignore_unknown_options": True,
151
+ "allow_extra_args": True,
152
+ "help_option_names": ["-h", "--help"],
153
+ },
154
+ )
155
+ @click.option("--models", "models_", multiple=True, help="Glob(s) matching model keys in models.yaml.")
156
+ @click.option("--all-models", is_flag=True, help="Run every model in models.yaml.")
157
+ @click.option("--cases", "cases_", multiple=True, help="Glob(s) matching test-case dirs.")
158
+ @click.option("--all-cases", is_flag=True, help="Run every bundled test case.")
159
+ @click.option("--case-range", default=None, help="Numeric ID range, e.g. 1-50.")
160
+ @click.option("--max-concurrent", type=int, default=2, help="Max parallel jobs (default: 2).")
161
+ @click.option("--output-dir", type=click.Path(path_type=Path), default=None,
162
+ help="Base output directory (default: ./claw-output).")
163
+ @click.option("--stagger-delay", type=float, default=15,
164
+ help="Min seconds between container starts (default: 15).")
165
+ @click.option("--dry-run", is_flag=True, help="Print job matrix without running.")
166
+ @click.option("--no-upload", is_flag=True, help="Skip HuggingFace upload for all runs.")
167
+ @click.pass_context
168
+ def batch_cmd(
169
+ ctx: click.Context,
170
+ models_: tuple[str, ...],
171
+ all_models: bool,
172
+ cases_: tuple[str, ...],
173
+ all_cases: bool,
174
+ case_range: str | None,
175
+ max_concurrent: int,
176
+ output_dir: Path | None,
177
+ stagger_delay: float,
178
+ dry_run: bool,
179
+ no_upload: bool,
180
+ ) -> None:
181
+ """Run a model x case cross-product concurrently."""
182
+ from clawbench import batch as _batch
183
+ argv: list[str] = []
184
+ if models_:
185
+ argv += ["--models", *models_]
186
+ if all_models:
187
+ argv.append("--all-models")
188
+ if cases_:
189
+ argv += ["--cases", *cases_]
190
+ if all_cases:
191
+ argv.append("--all-cases")
192
+ if case_range:
193
+ argv += ["--case-range", case_range]
194
+ argv += ["--max-concurrent", str(max_concurrent)]
195
+ if output_dir:
196
+ argv += ["--output-dir", str(output_dir)]
197
+ argv += ["--stagger-delay", str(stagger_delay)]
198
+ if dry_run:
199
+ argv.append("--dry-run")
200
+ if no_upload:
201
+ argv.append("--no-upload")
202
+ argv += list(ctx.args)
203
+ _batch.main(argv)
204
+
205
+
206
+ # ---------------------------------------------------------------------------
207
+ # build
208
+ # ---------------------------------------------------------------------------
209
+
210
+ @main.command("build")
211
+ @click.option("--no-cache", is_flag=True, help="Ignore layer cache — full rebuild.")
212
+ def build_cmd(no_cache: bool) -> None:
213
+ """Build the clawbench container image from the bundled Dockerfile."""
214
+ from clawbench import run as _run
215
+ # ``run.docker_build`` already retries with --no-cache on stale-cache
216
+ # detection; if the user explicitly asks for a cold build, we blow the
217
+ # cache up front by removing the existing image and then rebuilding.
218
+ if no_cache:
219
+ from clawbench.engine import detect_engine
220
+ eng = detect_engine()
221
+ if eng:
222
+ subprocess.run([eng, "image", "rm", "-f", "clawbench"],
223
+ capture_output=True)
224
+ _run.docker_build()
225
+
226
+
227
+ # ---------------------------------------------------------------------------
228
+ # cases
229
+ # ---------------------------------------------------------------------------
230
+
231
+ @main.command("cases")
232
+ @click.option("--category", default=None, help="Filter by category (substring match).")
233
+ def cases_cmd(category: str | None) -> None:
234
+ """List bundled test cases (name, category, time-limit)."""
235
+ import json as _json
236
+ base = _paths.test_cases_dir()
237
+ dirs = sorted(p.parent for p in base.glob("*/task.json"))
238
+ if not dirs:
239
+ click.echo("No test cases found.")
240
+ sys.exit(1)
241
+ # One outlier case has a 180+ char name; cap padding at 60 so the
242
+ # common case doesn't get a wall of whitespace.
243
+ width = min(60, max(len(d.name) for d in dirs))
244
+ shown = 0
245
+ for d in dirs:
246
+ try:
247
+ task = _json.loads((d / "task.json").read_text())
248
+ except Exception as e:
249
+ click.echo(f" {d.name:<{width}} [unreadable: {e}]")
250
+ continue
251
+ cat = task.get("category", "?")
252
+ if category and category.lower() not in cat.lower():
253
+ continue
254
+ time_limit = task.get("time_limit", "?")
255
+ click.echo(f" {d.name:<{width}} {cat:<20} {time_limit} min")
256
+ shown += 1
257
+ click.echo(f"\n{shown} case(s)")
258
+
259
+
260
+ # ---------------------------------------------------------------------------
261
+ # models
262
+ # ---------------------------------------------------------------------------
263
+
264
+ @main.command("models")
265
+ def models_cmd() -> None:
266
+ """List configured models from the user's models.yaml."""
267
+ import yaml as _yaml
268
+ path = _paths.user_models_yaml()
269
+ try:
270
+ data = _yaml.safe_load(path.read_text()) or {}
271
+ except Exception as e:
272
+ click.echo(f"ERROR: cannot read {path}: {e}", err=True)
273
+ sys.exit(1)
274
+ if not data:
275
+ click.echo(f"No models configured. Edit {path} or run `claw-bench configure`.")
276
+ return
277
+ click.echo(f"Models configured in {path}:")
278
+ for name in sorted(data):
279
+ api = data[name].get("api_type") if isinstance(data[name], dict) else "?"
280
+ click.echo(f" {name} ({api})")
281
+
282
+
283
+ # ---------------------------------------------------------------------------
284
+ # configure
285
+ # ---------------------------------------------------------------------------
286
+
287
+ @main.command("configure")
288
+ @click.option("--show", is_flag=True, help="Print the config file path and exit.")
289
+ @click.option("--secrets", is_flag=True, help="Write a secrets.env file (chmod 600) interactively.")
290
+ def configure_cmd(show: bool, secrets: bool) -> None:
291
+ """Open the user's models.yaml in $EDITOR, or manage secrets."""
292
+ if show and secrets:
293
+ click.echo("ERROR: pass --show OR --secrets, not both", err=True)
294
+ sys.exit(1)
295
+ if show:
296
+ click.echo(f"models.yaml: {_paths.user_models_yaml()}")
297
+ click.echo(f"config.json: {_paths.user_config_json()}")
298
+ click.echo(f"secrets.env: {_paths.user_secrets_path()}")
299
+ return
300
+ if secrets:
301
+ _write_secrets_interactive()
302
+ return
303
+ # Default: $EDITOR on models.yaml (seeds it first if missing).
304
+ path = _paths.user_models_yaml()
305
+ editor = os.environ.get("EDITOR") or os.environ.get("VISUAL") or "vi"
306
+ click.echo(f"Opening {path} with {editor}...")
307
+ try:
308
+ subprocess.run([editor, str(path)], check=False)
309
+ except FileNotFoundError:
310
+ click.echo(f"ERROR: editor '{editor}' not found. Set $EDITOR.", err=True)
311
+ sys.exit(1)
312
+
313
+
314
+ def _write_secrets_interactive() -> None:
315
+ """Prompt for PurelyMail + optional HF keys and persist to secrets.env.
316
+
317
+ We chmod 600 and parent-dir mkdir exists_ok=True via
318
+ :func:`_paths.user_config_dir`. Values blanked out are omitted so we
319
+ never overwrite a previously-persisted key with "".
320
+ """
321
+ target = _paths.user_secrets_path()
322
+ existing: dict[str, str] = {}
323
+ if target.exists():
324
+ for line in target.read_text().splitlines():
325
+ line = line.strip()
326
+ if not line or line.startswith("#") or "=" not in line:
327
+ continue
328
+ k, v = line.split("=", 1)
329
+ existing[k.strip()] = v.strip().strip('"').strip("'")
330
+ click.echo(f"Writing to {target}")
331
+ click.echo("Leave blank to keep the current value (or skip if unset).\n")
332
+
333
+ keys = [
334
+ ("PURELY_MAIL_API_KEY", "PurelyMail API key"),
335
+ ("PURELY_MAIL_DOMAIN", "PurelyMail domain"),
336
+ ("HF_TOKEN", "HuggingFace token (optional)"),
337
+ ("HF_REPO_ID", "HuggingFace dataset repo id (optional)"),
338
+ ]
339
+ updated: dict[str, str] = dict(existing)
340
+ for key, label in keys:
341
+ cur = existing.get(key, "")
342
+ hint = f" [current: {_redact(cur)}]" if cur else ""
343
+ val = click.prompt(f" {label}{hint}", default="", show_default=False).strip()
344
+ if val:
345
+ updated[key] = val
346
+
347
+ lines = ["# claw-bench secrets — chmod 600", ""]
348
+ lines += [f'{k}="{v}"' for k, v in updated.items()]
349
+ target.write_text("\n".join(lines) + "\n", encoding="utf-8")
350
+ try:
351
+ target.chmod(stat.S_IRUSR | stat.S_IWUSR)
352
+ except OSError:
353
+ pass # windows / non-posix — best effort
354
+ click.echo(f"\nWrote {len(updated)} key(s) to {target}")
355
+
356
+
357
+ def _redact(v: str) -> str:
358
+ if len(v) <= 4:
359
+ return "****"
360
+ return v[:2] + "****" + v[-2:]
361
+
362
+
363
+ # ---------------------------------------------------------------------------
364
+ # doctor
365
+ # ---------------------------------------------------------------------------
366
+
367
+ @main.command("doctor")
368
+ def doctor_cmd() -> None:
369
+ """Validate engine, image, test-cases, output perms, and secrets."""
370
+ click.echo("claw-bench diagnostics\n")
371
+ results = _doctor.run_all()
372
+ for r in results:
373
+ _echo_result(r)
374
+ click.echo()
375
+ fails = [r for r in results if r.status == "fail"]
376
+ warns = [r for r in results if r.status == "warn"]
377
+ if fails:
378
+ click.echo(click.style(f"{len(fails)} failing check(s). Fix and re-run.", fg="red"))
379
+ sys.exit(1)
380
+ if warns:
381
+ click.echo(click.style(f"{len(warns)} warning(s). ClawBench should still work.", fg="yellow"))
382
+ else:
383
+ click.echo(click.style("All checks passed.", fg="green"))
384
+
385
+
386
+ # ---------------------------------------------------------------------------
387
+ # version (explicit subcommand in addition to --version)
388
+ # ---------------------------------------------------------------------------
389
+
390
+ @main.command("version")
391
+ def version_cmd() -> None:
392
+ """Print the installed version."""
393
+ click.echo(__version__)
394
+
395
+
396
+ if __name__ == "__main__":
397
+ main()
@@ -0,0 +1,127 @@
1
+ # ClawBench Chrome Extension
2
+
3
+ This is the source code for the ClawBench Chrome Extension, which acts as the client for the ClawBench benchmarking framework.
4
+
5
+ The extension is responsible for the following tasks:
6
+
7
+ - Collecting every action performed by the user/agent on the browser and send the data to the ClawBench server.
8
+ - Take screenshots of the browser on every action, with high-frequency events throttled.
9
+ - Send full screen recording chunks to the server which can be later stitched together to an .mp4 file.
10
+
11
+ The extension should auto start when any non-built-in page is loaded, and should stop when the browser is closed. No UI or configuration is needed for the extension, as all configuration is done on the server side.
12
+
13
+ A `setup.sh` script is provided to load the extension into Chrome. Linux and MacOS are supported.
14
+
15
+ ## Files
16
+
17
+ | File | Description |
18
+ |------|-------------|
19
+ | `manifest.json` | Manifest V3 extension definition. Permissions: `activeTab`, `tabs`. Content scripts injected on all URLs. |
20
+ | `stealth.js` | Anti-bot-detection patches. Runs at `document_start` in `MAIN` world. Overrides `navigator.webdriver`, plugins, WebGL, permissions, etc. |
21
+ | `content.js` | Injected into every non-chrome:// page. Listens for DOM events, extracts metadata, sends to background. Runs at `document_idle` in `ISOLATED` world. |
22
+ | `background.js` | Service worker. Relays actions to server via HTTP POST. Captures screenshots with `chrome.tabs.captureVisibleTab`. |
23
+ | `setup.sh` | Detects Chrome/Chromium binary on macOS or Linux and launches with `--load-extension` and remote debugging enabled. |
24
+
25
+ ## Event Capture
26
+
27
+ ### Captured Events
28
+
29
+ `click`, `keydown`, `keyup`, `input`, `scroll`, `change`, `submit`, plus a synthetic `pageLoad` on each navigation.
30
+
31
+ ### Throttling
32
+
33
+ High-frequency events (`scroll`, `input`) are throttled to one every 500ms. Screenshots are also throttled to one every 500ms.
34
+
35
+ ### Action Payload
36
+
37
+ Each action sent to the server contains:
38
+
39
+ ```json
40
+ {
41
+ "type": "click",
42
+ "timestamp": 1710000001234,
43
+ "url": "https://example.com/",
44
+ "target": {
45
+ "tagName": "BUTTON",
46
+ "id": "submit-btn",
47
+ "className": "btn primary",
48
+ "textContent": "Submit",
49
+ "xpath": "/html[1]/body[1]/form[1]/button[1]"
50
+ },
51
+ "x": 255,
52
+ "y": 245
53
+ }
54
+ ```
55
+
56
+ Additional fields by event type:
57
+ - **click**: `x`, `y` (coordinates)
58
+ - **keydown/keyup**: `key` (key name)
59
+ - **input/change**: `value` (truncated to 200 chars)
60
+ - **scroll**: `scrollX`, `scrollY`
61
+ - **pageLoad**: `title`
62
+
63
+ ## Anti-Bot-Detection (Stealth)
64
+
65
+ The extension includes `stealth.js`, a content script injected at `document_start` in the `MAIN` world — meaning it runs before any page JavaScript and patches the page's actual `window`/`navigator` objects (not the extension's isolated world). This reduces the chance of being blocked by reCAPTCHA, Cloudflare Turnstile, and similar bot-detection systems.
66
+
67
+ The stealth measures are split across three layers:
68
+
69
+ ### Layer 1: Chrome Launch Flags (`entrypoint.sh`)
70
+
71
+ | Flag | What it does |
72
+ |------|-------------|
73
+ | Removed `--enable-automation` | Was explicitly telling Chrome to set `navigator.webdriver = true` and show the "controlled by automated software" infobar. Removing it eliminates both signals. |
74
+ | Removed `--disable-gpu` | Was disabling all GPU/WebGL rendering. Sites that fingerprint WebGL would see no renderer — a strong headless signal. |
75
+ | `--disable-blink-features=AutomationControlled` | Tells Blink not to set `navigator.webdriver = true`, even if CDP is attached. Belt-and-suspenders with the flag removal. |
76
+ | `--use-gl=angle --use-angle=swiftshader` | Enables software-rendered WebGL via SwiftShader through the ANGLE backend. This makes WebGL available with realistic renderer strings without a real GPU. Trade-off: higher CPU usage since all GL operations run in software. |
77
+ | `--enable-webgl` | Explicitly ensures WebGL contexts can be created. |
78
+ | `--remote-debugging-address=127.0.0.1` | CDP was previously bound to `0.0.0.0` (all interfaces). Now only accessible internally. External access still works through the `socat` forwarder on port 9223. Prevents page JavaScript from detecting CDP by probing network ports. |
79
+
80
+ ### Layer 2: Chrome Profile (`entrypoint.sh`)
81
+
82
+ An empty Chrome profile with no bookmarks, no history, and no preferences is a strong signal of a freshly-created automated browser. The entrypoint now pre-populates:
83
+
84
+ - **Preferences**: `accept_languages`, `safebrowsing`, `dns_prefetching`, `window_placement`, `skip_first_run_ui`, etc.
85
+ - **Bookmarks**: Three common entries (Google, YouTube, Wikipedia).
86
+ - **Local State**: Profile metadata with a named profile ("Person 1").
87
+
88
+ ### Layer 3: JavaScript Patches (`stealth.js`)
89
+
90
+ | # | Patch | Why |
91
+ |---|-------|-----|
92
+ | 1 | `navigator.webdriver → false` | The #1 bot detection signal. Real Chrome returns `false`; automated Chrome returns `true`. Even with the Blink flag, CDP attachment can re-enable it. |
93
+ | 2 | `navigator.languages → ['en-US', 'en']` | Ensures consistent locale regardless of container environment. |
94
+ | 3 | `navigator.plugins` — fake Chrome PDF Plugin, Chrome PDF Viewer, Native Client | Headless/automated Chrome reports an empty `PluginArray` (length 0). Real Chrome always has PDF and NaCl plugins. |
95
+ | 4 | `navigator.mimeTypes` — fake `application/pdf` entries | Must match the fake plugins. Empty mimeTypes = headless signal. |
96
+ | 5 | WebGL `getParameter()` — return SwiftShader vendor/renderer | Even with SwiftShader actually running, this ensures consistent, known-good strings across Chromium versions. Intercepts `UNMASKED_VENDOR_WEBGL` (0x9245) and `UNMASKED_RENDERER_WEBGL` (0x9246). |
97
+ | 6 | `Permissions.query({name:'notifications'})` → `'prompt'`, `Notification.permission` → `'default'` | Automated browsers deny all permissions by default. Real browsers return `'prompt'`/`'default'` for notifications. |
98
+ | 7 | `window.chrome.runtime` — ensure object exists | Some bot detectors check `if (!window.chrome \|\| !window.chrome.runtime)` to distinguish headless Chrome from real Chrome. |
99
+ | 8 | Remove `$cdc_`/`cdc_` properties on `document` | Chromedriver injects these properties. Not used by CDP directly, but removed as a precaution. |
100
+ | 9 | `navigator.hardwareConcurrency` → 8 (if < 4) | Docker containers with limited CPUs may report 1-2, which is suspicious for a desktop browser. |
101
+ | 10 | `navigator.deviceMemory` → 8 (if < 4) | Same — low memory is suspicious for desktop. |
102
+ | 11 | Iframe `navigator.webdriver` patching | Advanced fingerprinters create iframes and check `navigator.webdriver` inside them to bypass page-level overrides. We hook `document.createElement('iframe')` and patch the iframe's navigator on load. |
103
+
104
+ ### Layer 4: Dockerfile
105
+
106
+ `libegl1` and `libgbm1` are installed to provide the EGL and GBM libraries that Chrome's ANGLE/SwiftShader backend needs. Without them, `--use-gl=angle` silently falls back to no-GPU mode.
107
+
108
+ ### Test Results
109
+
110
+ Verified against bot-detection sites (2026-03-28):
111
+
112
+ | Test | Result |
113
+ |------|--------|
114
+ | bot.sannysoft.com | 10/11 main tests pass (only "WebDriver New" orange — CDP attachment quirk; "WebDriver Advanced" passes) |
115
+ | intoli headless detection | "You are not Chrome headless" |
116
+ | Cloudflare (nowsecure.nl) | Soft Turnstile challenge (not hard-blocked) |
117
+ | CreepJS | Fingerprint generated without bot flag |
118
+
119
+ ## Local Development
120
+
121
+ Run Chrome with the extension loaded:
122
+
123
+ ```bash
124
+ ./setup.sh https://example.com
125
+ ```
126
+
127
+ The server must be running on `http://localhost:7878` for the extension to send data.
@@ -0,0 +1,50 @@
1
+ const SERVER = "http://localhost:7878";
2
+ const SCREENSHOT_THROTTLE_MS = 500;
3
+
4
+ let lastScreenshot = 0;
5
+
6
+ // Auto-focus newly created tabs so the agent's working tab is always visible
7
+ chrome.tabs.onCreated.addListener((tab) => {
8
+ if (tab.id) {
9
+ chrome.tabs.update(tab.id, { active: true });
10
+ }
11
+ });
12
+
13
+ // Receive events from content script
14
+ chrome.runtime.onMessage.addListener((msg, sender) => {
15
+ if (msg.type === "action") {
16
+ // Bring the tab where the action occurred to front so the screen recording
17
+ // and captureVisibleTab always show the tab the agent is working on.
18
+ if (sender.tab && sender.tab.id) {
19
+ chrome.tabs.update(sender.tab.id, { active: true });
20
+ }
21
+ postAction(msg.data);
22
+ captureScreenshot();
23
+ }
24
+ });
25
+
26
+ async function postAction(data) {
27
+ try {
28
+ await fetch(`${SERVER}/api/action`, {
29
+ method: "POST",
30
+ headers: { "Content-Type": "application/json" },
31
+ body: JSON.stringify(data),
32
+ });
33
+ } catch (e) { console.error("[clawbench] postAction failed:", e); }
34
+ }
35
+
36
+ async function captureScreenshot() {
37
+ const now = Date.now();
38
+ if (now - lastScreenshot < SCREENSHOT_THROTTLE_MS) return;
39
+ lastScreenshot = now;
40
+
41
+ try {
42
+ const dataUrl = await chrome.tabs.captureVisibleTab(null, { format: "png" });
43
+ const base64 = dataUrl.replace(/^data:image\/png;base64,/, "");
44
+ await fetch(`${SERVER}/api/screenshot`, {
45
+ method: "POST",
46
+ headers: { "Content-Type": "application/json" },
47
+ body: JSON.stringify({ timestamp: now, data: base64 }),
48
+ });
49
+ } catch (e) { console.error("[clawbench] captureScreenshot failed:", e); }
50
+ }
@@ -0,0 +1,70 @@
1
+ const THROTTLE_MS = 500;
2
+ const lastSent = {};
3
+
4
+ function getXPath(el) {
5
+ if (!el || el.nodeType !== 1) return "";
6
+ const parts = [];
7
+ while (el && el.nodeType === 1) {
8
+ let idx = 1;
9
+ for (let sib = el.previousElementSibling; sib; sib = sib.previousElementSibling) {
10
+ if (sib.tagName === el.tagName) idx++;
11
+ }
12
+ parts.unshift(`${el.tagName.toLowerCase()}[${idx}]`);
13
+ el = el.parentElement;
14
+ }
15
+ return "/" + parts.join("/");
16
+ }
17
+
18
+ function buildPayload(type, e) {
19
+ const target = e.target || {};
20
+ const payload = {
21
+ type,
22
+ timestamp: Date.now(),
23
+ url: location.href,
24
+ target: {
25
+ tagName: target.tagName || "",
26
+ id: target.id || "",
27
+ className: target.className || "",
28
+ textContent: (target.textContent || "").slice(0, 100),
29
+ xpath: getXPath(target),
30
+ },
31
+ };
32
+ if (e.clientX !== undefined) {
33
+ payload.x = e.clientX;
34
+ payload.y = e.clientY;
35
+ }
36
+ if (e.key) payload.key = e.key;
37
+ if (target.value !== undefined) payload.value = String(target.value).slice(0, 200);
38
+ if (type === "scroll") {
39
+ payload.scrollX = window.scrollX;
40
+ payload.scrollY = window.scrollY;
41
+ }
42
+ return payload;
43
+ }
44
+
45
+ function throttled(type) {
46
+ return type === "scroll" || type === "input";
47
+ }
48
+
49
+ function send(type, e) {
50
+ if (throttled(type)) {
51
+ const now = Date.now();
52
+ if (lastSent[type] && now - lastSent[type] < THROTTLE_MS) return;
53
+ lastSent[type] = now;
54
+ }
55
+ chrome.runtime.sendMessage({ type: "action", data: buildPayload(type, e) });
56
+ }
57
+
58
+ ["click", "keydown", "keyup", "input", "scroll", "change", "submit"].forEach((evt) => {
59
+ document.addEventListener(evt, (e) => send(evt, e), true);
60
+ });
61
+
62
+ chrome.runtime.sendMessage({
63
+ type: "action",
64
+ data: {
65
+ type: "pageLoad",
66
+ timestamp: Date.now(),
67
+ url: location.href,
68
+ title: document.title,
69
+ },
70
+ });
@@ -0,0 +1,25 @@
1
+ {
2
+ "manifest_version": 3,
3
+ "name": "ClawBench",
4
+ "version": "1.0",
5
+ "description": "Browser action recording for benchmarking",
6
+ "permissions": ["activeTab", "tabs"],
7
+ "host_permissions": ["<all_urls>"],
8
+ "background": {
9
+ "service_worker": "background.js"
10
+ },
11
+ "content_scripts": [
12
+ {
13
+ "matches": ["<all_urls>"],
14
+ "js": ["stealth.js"],
15
+ "run_at": "document_start",
16
+ "world": "MAIN"
17
+ },
18
+ {
19
+ "matches": ["<all_urls>"],
20
+ "js": ["content.js"],
21
+ "run_at": "document_idle",
22
+ "all_frames": true
23
+ }
24
+ ]
25
+ }