clawbench-cli 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. clawbench/__init__.py +35 -0
  2. clawbench/__main__.py +8 -0
  3. clawbench/batch.py +619 -0
  4. clawbench/cli.py +397 -0
  5. clawbench/data/chrome-extension/README.md +127 -0
  6. clawbench/data/chrome-extension/background.js +50 -0
  7. clawbench/data/chrome-extension/content.js +70 -0
  8. clawbench/data/chrome-extension/manifest.json +25 -0
  9. clawbench/data/chrome-extension/setup.sh +27 -0
  10. clawbench/data/chrome-extension/stealth.js +200 -0
  11. clawbench/data/docker/Dockerfile +51 -0
  12. clawbench/data/docker/entrypoint.sh +394 -0
  13. clawbench/data/docker/setup-openclaw.sh +112 -0
  14. clawbench/data/eval/README.md +95 -0
  15. clawbench/data/eval/agentic_eval.md +53 -0
  16. clawbench/data/extension-server/.python-version +1 -0
  17. clawbench/data/extension-server/README.md +54 -0
  18. clawbench/data/extension-server/pyproject.toml +7 -0
  19. clawbench/data/extension-server/server.py +360 -0
  20. clawbench/data/extension-server/uv.lock +644 -0
  21. clawbench/data/models/model.schema.json +44 -0
  22. clawbench/data/models/models.example.yaml +16 -0
  23. clawbench/data/shared/alex_green_personal_info.json +451 -0
  24. clawbench/data/test-cases/001-daily-life-food-uber-eats/task.json +25 -0
  25. clawbench/data/test-cases/002-daily-life-food-doordash/task.json +25 -0
  26. clawbench/data/test-cases/004-daily-life-food-instacart/extra_info/grocery_list.json +36 -0
  27. clawbench/data/test-cases/004-daily-life-food-instacart/task.json +30 -0
  28. clawbench/data/test-cases/006-daily-life-food-uber-eats/task.json +24 -0
  29. clawbench/data/test-cases/007-daily-life-food-instacart/extra_info/meal_plan.json +21 -0
  30. clawbench/data/test-cases/007-daily-life-food-instacart/task.json +30 -0
  31. clawbench/data/test-cases/011-daily-life-housing-zillow/task.json +25 -0
  32. clawbench/data/test-cases/015-daily-life-housing-craigslist/extra_info/listing_details.json +26 -0
  33. clawbench/data/test-cases/015-daily-life-housing-craigslist/task.json +30 -0
  34. clawbench/data/test-cases/035-daily-life-health-medical-betterhelp/task.json +25 -0
  35. clawbench/data/test-cases/041-daily-life-pets-rover/task.json +25 -0
  36. clawbench/data/test-cases/043-daily-life-pets-rover/extra_info/pet_info.json +12 -0
  37. clawbench/data/test-cases/043-daily-life-pets-rover/task.json +30 -0
  38. clawbench/data/test-cases/045-daily-life-personal-care-booksy/task.json +25 -0
  39. clawbench/data/test-cases/047-daily-life-personal-care-taskrabbit/extra_info/address_info.json +7 -0
  40. clawbench/data/test-cases/047-daily-life-personal-care-taskrabbit/task.json +30 -0
  41. clawbench/data/test-cases/086-job-search-hr-cv-autofill-greenhouse-meta/extra_info/job_links.json +5 -0
  42. clawbench/data/test-cases/086-job-search-hr-cv-autofill-greenhouse-meta/task.json +30 -0
  43. clawbench/data/test-cases/089-job-search-hr-cv-autofill-simplify-jobs/extra_info/job_links.json +5 -0
  44. clawbench/data/test-cases/089-job-search-hr-cv-autofill-simplify-jobs/task.json +30 -0
  45. clawbench/data/test-cases/091-job-search-hr-job-apply-indeed/task.json +25 -0
  46. clawbench/data/test-cases/120-office-secretary-tasks-email-mgmt-purelymail/task.json +28 -0
  47. clawbench/data/test-cases/121-office-secretary-tasks-email-mgmt-purelymail/task.json +28 -0
  48. clawbench/data/test-cases/128-office-secretary-tasks-email-mgmt-purelymail/task.json +28 -0
  49. clawbench/data/test-cases/134-office-secretary-tasks-calendar-calendly/task.json +25 -0
  50. clawbench/data/test-cases/137-office-secretary-tasks-calendar-doodle/extra_info/meeting_details.json +30 -0
  51. clawbench/data/test-cases/137-office-secretary-tasks-calendar-doodle/task.json +30 -0
  52. clawbench/data/test-cases/139-office-secretary-tasks-calendar-calendly/task.json +25 -0
  53. clawbench/data/test-cases/142-office-secretary-tasks-collab-trello/extra_info/task_list.json +29 -0
  54. clawbench/data/test-cases/142-office-secretary-tasks-collab-trello/task.json +30 -0
  55. clawbench/data/test-cases/179-dev-tech-github-ops-github/extra_info/config.json +13 -0
  56. clawbench/data/test-cases/179-dev-tech-github-ops-github/task.json +30 -0
  57. clawbench/data/test-cases/180-dev-tech-github-ops-github/task.json +25 -0
  58. clawbench/data/test-cases/215-academia-research-paper-tables-overleaf/extra_info/raw_results.json +47 -0
  59. clawbench/data/test-cases/215-academia-research-paper-tables-overleaf/task.json +30 -0
  60. clawbench/data/test-cases/242-academia-research-research-tools-overleaf/task.json +25 -0
  61. clawbench/data/test-cases/246-academia-research-research-tools-zotero/task.json +25 -0
  62. clawbench/data/test-cases/247-academia-research-research-tools-semantic-scholar/task.json +25 -0
  63. clawbench/data/test-cases/265-education-learning-general-coursera/task.json +25 -0
  64. clawbench/data/test-cases/266-education-learning-general-leetcode/extra_info/solution_code.py +9 -0
  65. clawbench/data/test-cases/266-education-learning-general-leetcode/task.json +30 -0
  66. clawbench/data/test-cases/273-education-learning-general-edx/task.json +25 -0
  67. clawbench/data/test-cases/274-education-learning-general-udemy/task.json +25 -0
  68. clawbench/data/test-cases/279-travel-general-airbnb/task.json +25 -0
  69. clawbench/data/test-cases/280-travel-general-booking-com/task.json +25 -0
  70. clawbench/data/test-cases/363-entertainment-hobbies-general-ticketmaster/task.json +25 -0
  71. clawbench/data/test-cases/369-entertainment-hobbies-general-goodreads/extra_info/book_list.json +14 -0
  72. clawbench/data/test-cases/369-entertainment-hobbies-general-goodreads/task.json +30 -0
  73. clawbench/data/test-cases/372-entertainment-hobbies-general-eventbrite/extra_info/event_details.json +10 -0
  74. clawbench/data/test-cases/372-entertainment-hobbies-general-eventbrite/task.json +30 -0
  75. clawbench/data/test-cases/403-personal-management-account-security-1password-web/extra_info/credentials.json +34 -0
  76. clawbench/data/test-cases/403-personal-management-account-security-1password-web/task.json +30 -0
  77. clawbench/data/test-cases/413-personal-management-personal-tools-todoist/extra_info/task_list.json +52 -0
  78. clawbench/data/test-cases/413-personal-management-personal-tools-todoist/task.json +30 -0
  79. clawbench/data/test-cases/468-rating-voting-general-glassdoor/extra_info/interview_experience.json +10 -0
  80. clawbench/data/test-cases/468-rating-voting-general-glassdoor/task.json +30 -0
  81. clawbench/data/test-cases/469-rating-voting-general-tripadvisor/extra_info/review_content.json +6 -0
  82. clawbench/data/test-cases/469-rating-voting-general-tripadvisor/task.json +30 -0
  83. clawbench/data/test-cases/470-rating-voting-general-trustpilot/extra_info/review_content.json +6 -0
  84. clawbench/data/test-cases/470-rating-voting-general-trustpilot/task.json +30 -0
  85. clawbench/data/test-cases/474-rating-voting-general-capterra/task.json +25 -0
  86. clawbench/data/test-cases/475-rating-voting-general-g2/task.json +25 -0
  87. clawbench/data/test-cases/482-creation-init-general-confluence/extra_info/content.json +3 -0
  88. clawbench/data/test-cases/482-creation-init-general-confluence/task.json +30 -0
  89. clawbench/data/test-cases/483-creation-init-general-airtable/task.json +25 -0
  90. clawbench/data/test-cases/484-creation-init-general-clickup/task.json +28 -0
  91. clawbench/data/test-cases/485-creation-init-general-webflow/task.json +25 -0
  92. clawbench/data/test-cases/486-creation-init-general-mailchimp/extra_info/content.json +3 -0
  93. clawbench/data/test-cases/486-creation-init-general-mailchimp/task.json +30 -0
  94. clawbench/data/test-cases/487-creation-init-general-typeform/extra_info/survey_questions.json +85 -0
  95. clawbench/data/test-cases/487-creation-init-general-typeform/task.json +30 -0
  96. clawbench/data/test-cases/488-creation-init-general-substack/extra_info/content.json +3 -0
  97. clawbench/data/test-cases/488-creation-init-general-substack/task.json +30 -0
  98. clawbench/data/test-cases/489-creation-init-general-ghost/extra_info/content.json +3 -0
  99. clawbench/data/test-cases/489-creation-init-general-ghost/task.json +30 -0
  100. clawbench/data/test-cases/501-creation-init-general-asana/extra_info/project_description.json +8 -0
  101. clawbench/data/test-cases/501-creation-init-general-asana/task.json +33 -0
  102. clawbench/data/test-cases/529-daily-life-shopping-delivery-king-arthur-baking/task.json +25 -0
  103. clawbench/data/test-cases/533-daily-life-utilities-inmyarea/task.json +25 -0
  104. clawbench/data/test-cases/535-daily-life-home-home-depot/task.json +25 -0
  105. clawbench/data/test-cases/537-daily-life-food-crumbl/task.json +25 -0
  106. clawbench/data/test-cases/539-daily-life-health-jefit/task.json +25 -0
  107. clawbench/data/test-cases/542-daily-life-pets-wag/task.json +25 -0
  108. clawbench/data/test-cases/551-finance-investment-crypto-wallet-trezor/task.json +25 -0
  109. clawbench/data/test-cases/552-finance-investment-business-payment-plooto/task.json +25 -0
  110. clawbench/data/test-cases/555-finance-investment-insurance-insureon/task.json +25 -0
  111. clawbench/data/test-cases/559-finance-investment-crowdfunding-frontfundr/task.json +25 -0
  112. clawbench/data/test-cases/564-daily-life-event-registration-race-roster/task.json +25 -0
  113. clawbench/data/test-cases/565-job-search-hr-job-search-jopwell/task.json +25 -0
  114. clawbench/data/test-cases/566-job-search-hr-job-search-ziprecruiter/extra_info/listing_details.json +26 -0
  115. clawbench/data/test-cases/566-job-search-hr-job-search-ziprecruiter/task.json +30 -0
  116. clawbench/data/test-cases/569-job-search-hr-job-search-careerbuilder/task.json +25 -0
  117. clawbench/data/test-cases/570-job-search-hr-job-search-hired/task.json +25 -0
  118. clawbench/data/test-cases/571-job-search-hr-recruitment-mgmt-workable/extra_info/listing_details.json +26 -0
  119. clawbench/data/test-cases/571-job-search-hr-recruitment-mgmt-workable/task.json +30 -0
  120. clawbench/data/test-cases/576-office-secretary-tasks-reports-ftc-reportfraud/task.json +25 -0
  121. clawbench/data/test-cases/583-office-secretary-tasks-support-tickets-freshdesk/task.json +25 -0
  122. clawbench/data/test-cases/598-academia-research-legal-docs-formswift/task.json +25 -0
  123. clawbench/data/test-cases/606-education-learning-kids-courses-outschool/task.json +25 -0
  124. clawbench/data/test-cases/607-education-learning-art-courses-creativebug/task.json +25 -0
  125. clawbench/data/test-cases/609-education-learning-meditation-spirit-rock-meditation-center/task.json +25 -0
  126. clawbench/data/test-cases/615-travel-flights-spirit-airlines/task.json +25 -0
  127. clawbench/data/test-cases/618-travel-train-bus-12go-asia/task.json +25 -0
  128. clawbench/data/test-cases/625-travel-camping-outdoor-parks-canada-reservations/task.json +25 -0
  129. clawbench/data/test-cases/626-travel-bus-flixbus/task.json +25 -0
  130. clawbench/data/test-cases/627-travel-flights-momondo/task.json +25 -0
  131. clawbench/data/test-cases/632-shopping-commerce-beauty-care-olaplex/task.json +25 -0
  132. clawbench/data/test-cases/634-shopping-commerce-apparel-dooney-bourke/task.json +25 -0
  133. clawbench/data/test-cases/635-shopping-commerce-gifts-uncommon-goods/task.json +25 -0
  134. clawbench/data/test-cases/636-shopping-commerce-auto-parts-rockauto/task.json +25 -0
  135. clawbench/data/test-cases/638-shopping-commerce-print-custom-vistaprint/task.json +25 -0
  136. clawbench/data/test-cases/639-shopping-commerce-luxury-mansur-gavriel/task.json +25 -0
  137. clawbench/data/test-cases/671-entertainment-gaming-humble-bundle/task.json +25 -0
  138. clawbench/data/test-cases/672-entertainment-hobbies-anime-streaming-crunchyroll/task.json +25 -0
  139. clawbench/data/test-cases/674-entertainment-hobbies-masterclass-masterclass/task.json +25 -0
  140. clawbench/data/test-cases/676-government-civic-legal-docs-legalnature/task.json +25 -0
  141. clawbench/data/test-cases/685-personal-management-budget-mgmt-everydollar/task.json +25 -0
  142. clawbench/data/test-cases/687-personal-management-vpn-subscription-ipvanish/task.json +25 -0
  143. clawbench/data/test-cases/688-personal-management-insurance-compare-insurify/task.json +25 -0
  144. clawbench/data/test-cases/695-automation-workflows-recurring-order-stumptown-coffee/task.json +25 -0
  145. clawbench/data/test-cases/697-automation-workflows-recurring-order-bean-box/task.json +25 -0
  146. clawbench/data/test-cases/699-automation-workflows-recurring-order-mistobox/task.json +25 -0
  147. clawbench/data/test-cases/700-deletion-revocation-data-deletion-deleteme/task.json +25 -0
  148. clawbench/data/test-cases/705-rating-voting-wine-review-vivino/task.json +25 -0
  149. clawbench/data/test-cases/706-rating-voting-beer-review-beeradvocate/task.json +25 -0
  150. clawbench/data/test-cases/707-rating-voting-social-wine-untappd/task.json +25 -0
  151. clawbench/data/test-cases/708-rating-voting-professor-review-ratemyprofessors/task.json +28 -0
  152. clawbench/data/test-cases/709-rating-voting-service-review-angi/task.json +25 -0
  153. clawbench/data/test-cases/710-creation-init-interior-design-roomsketcher/task.json +25 -0
  154. clawbench/data/test-cases/711-creation-init-color-design-coolors/task.json +25 -0
  155. clawbench/data/test-cases/712-creation-init-website-create-squarespace/task.json +25 -0
  156. clawbench/data/test-cases/713-creation-init-website-build-wix/task.json +25 -0
  157. clawbench/data/test-cases/735-home-services-maintenance-house-cleaning-bark/task.json +25 -0
  158. clawbench/data/test-cases/736-home-services-maintenance-plumbing-ace-hardware/task.json +25 -0
  159. clawbench/data/test-cases/737-home-services-maintenance-kitchen-remodel-lowes/task.json +25 -0
  160. clawbench/data/test-cases/738-home-services-maintenance-equipment-install-amazon-home-services/task.json +25 -0
  161. clawbench/data/test-cases/750-automotive-vehicle-services-car-insurance-compare-kanetix/task.json +25 -0
  162. clawbench/data/test-cases/751-automotive-vehicle-services-car-lease-sixt/task.json +25 -0
  163. clawbench/data/test-cases/754-automotive-vehicle-services-used-car-listing-autotrader/task.json +25 -0
  164. clawbench/data/test-cases/763-automotive-vehicle-services-car-lease-autoslash/task.json +25 -0
  165. clawbench/data/test-cases/766-nonprofit-charity-donation-doctors-without-borders-msf/task.json +25 -0
  166. clawbench/data/test-cases/768-nonprofit-charity-community-crowdfund-ioby/task.json +25 -0
  167. clawbench/data/test-cases/770-nonprofit-charity-volunteer-apply-on-make-a-wish-foundation-website-complete-and-submit-a-volunteer-application-form-selecting-the-wish-granter-role-and-entering-city-phoenix-az/task.json +25 -0
  168. clawbench/data/test-cases/774-nonprofit-charity-nonprofit-job-apply-charity-village/task.json +25 -0
  169. clawbench/data/test-cases/776-nonprofit-charity-volunteer-signup-idealist/task.json +25 -0
  170. clawbench/data/test-cases/778-nonprofit-charity-donation-globalgiving/extra_info/payment_info.json +3 -0
  171. clawbench/data/test-cases/778-nonprofit-charity-donation-globalgiving/task.json +30 -0
  172. clawbench/data/test-cases/780-beauty-personal-care-skincare-purchase-soko-glam/extra_info/address_info.json +4 -0
  173. clawbench/data/test-cases/780-beauty-personal-care-skincare-purchase-soko-glam/task.json +30 -0
  174. clawbench/data/test-cases/781-beauty-personal-care-beauty-booking-bluemercury/extra_info/email_info.json +3 -0
  175. clawbench/data/test-cases/781-beauty-personal-care-beauty-booking-bluemercury/task.json +30 -0
  176. clawbench/data/test-cases/782-beauty-personal-care-skincare-purchase-paulas-choice/task.json +24 -0
  177. clawbench/data/test-cases/783-beauty-personal-care-beauty-booking-ulta-beauty/task.json +24 -0
  178. clawbench/data/test-cases/785-beauty-personal-care-skincare-curology/task.json +25 -0
  179. clawbench/data/test-cases/788-beauty-personal-care-makeup-the-ordinary/task.json +25 -0
  180. clawbench/data/test-cases/789-beauty-personal-care-makeup-fenty-beauty/task.json +25 -0
  181. clawbench/data/test-cases/793-beauty-personal-care-beauty-retail-mac-cosmetics/task.json +25 -0
  182. clawbench/data/test-cases/794-beauty-personal-care-salon-booking-styleseat/task.json +25 -0
  183. clawbench/data/test-cases/795-pet-animal-care-pet-adoption-aspca/task.json +25 -0
  184. clawbench/data/test-cases/796-pet-animal-care-pet-supplies-grooming-petsmart/extra_info/pet_info.json +12 -0
  185. clawbench/data/test-cases/796-pet-animal-care-pet-supplies-grooming-petsmart/task.json +30 -0
  186. clawbench/data/test-cases/799-pet-animal-care-pet-insurance-aspca-pet-health-insurance/task.json +25 -0
  187. clawbench/data/test-cases/801-pet-animal-care-pet-friendly-travel-bringfido/task.json +25 -0
  188. clawbench/data/test-cases/803-pet-animal-care-pet-medical-pawp/extra_info/pet_info.json +12 -0
  189. clawbench/data/test-cases/803-pet-animal-care-pet-medical-pawp/task.json +30 -0
  190. clawbench/data/test-cases/807-pet-animal-care-pet-dna-embark/task.json +25 -0
  191. clawbench/data/test-cases/809-pet-animal-care-pet-adopt-petfinder/task.json +28 -0
  192. clawbench/data/test-cases/812-pet-animal-care-pet-subscription-ollie/task.json +25 -0
  193. clawbench/data/test-cases/815-personal-management-records-mgmt-myheritage/task.json +25 -0
  194. clawbench/data/test-cases/821-education-learning-reading-self-study-blinkist/task.json +25 -0
  195. clawbench/data/test-cases/861-entertainment-hobbies-movies-cineplex/task.json +25 -0
  196. clawbench/data/test-cases/862-entertainment-hobbies-movies-amc-theatres/task.json +25 -0
  197. clawbench/data/test-cases/864-entertainment-hobbies-show-tickets-ticketmaster/task.json +25 -0
  198. clawbench/data/test-cases/865-travel-outdoor-hipcamp/task.json +25 -0
  199. clawbench/data/test-cases/867-entertainment-hobbies-movies-fandango/task.json +25 -0
  200. clawbench/data/test-cases/872-daily-life-food-opentable/task.json +25 -0
  201. clawbench/data/test-cases/873-daily-life-food-resy/task.json +28 -0
  202. clawbench/data/test-cases/876-entertainment-hobbies-show-tickets-vivid-seats/task.json +25 -0
  203. clawbench/data/test-cases/877-entertainment-hobbies-show-tickets-stubhub/task.json +25 -0
  204. clawbench/data/test-cases/878-travel-outdoor-ontario-parks/task.json +25 -0
  205. clawbench/data/test-cases/883-education-learning-hobby-class-sur-la-table/task.json +25 -0
  206. clawbench/data/test-cases/884-entertainment-hobbies-experience-breakout-games/task.json +25 -0
  207. clawbench/data/test-cases/885-entertainment-hobbies-experience-bowlero/task.json +25 -0
  208. clawbench/data/test-cases/886-entertainment-hobbies-experience-topgolf/task.json +25 -0
  209. clawbench/data/test-cases/lite.json +226 -0
  210. clawbench/data/test-cases/lite.schema.json +105 -0
  211. clawbench/data/test-cases/task.schema.json +132 -0
  212. clawbench/data/tools/build_clawbench_lite_enc.py +161 -0
  213. clawbench/doctor.py +171 -0
  214. clawbench/engine.py +180 -0
  215. clawbench/generate_resume_pdf.py +140 -0
  216. clawbench/hf_upload.py +78 -0
  217. clawbench/image.py +127 -0
  218. clawbench/paths.py +150 -0
  219. clawbench/resume_template.json +104 -0
  220. clawbench/run.py +942 -0
  221. clawbench/tui.py +1401 -0
  222. clawbench_cli-0.1.2.dist-info/METADATA +770 -0
  223. clawbench_cli-0.1.2.dist-info/RECORD +226 -0
  224. clawbench_cli-0.1.2.dist-info/WHEEL +4 -0
  225. clawbench_cli-0.1.2.dist-info/entry_points.txt +4 -0
  226. clawbench_cli-0.1.2.dist-info/licenses/LICENSE +201 -0
clawbench/hf_upload.py ADDED
@@ -0,0 +1,78 @@
1
+ """Optional HuggingFace dataset upload for ClawBench runs."""
2
+
3
+ import json
4
+ from datetime import datetime, timezone
5
+ from pathlib import Path
6
+
7
+
8
+ def hf_upload_enabled(env: dict[str, str]) -> bool:
9
+ """Check if HF_TOKEN and HF_REPO_ID are configured."""
10
+ return bool(env.get("HF_TOKEN")) and bool(env.get("HF_REPO_ID"))
11
+
12
+
13
+ def upload_run(output_dir: Path, repo_path_prefix: str, env: dict[str, str]) -> None:
14
+ """Upload a run's output directory to HuggingFace, then replace local data/ with a marker.
15
+
16
+ Args:
17
+ output_dir: Local output path (contains run-meta.json, data/).
18
+ repo_path_prefix: Path inside the HF repo, e.g. "model/case-model-ts".
19
+ env: Dict with HF_TOKEN and HF_REPO_ID.
20
+ """
21
+ try:
22
+ from huggingface_hub import HfApi
23
+ except ImportError:
24
+ print(" WARNING: huggingface_hub not installed, skipping upload")
25
+ return
26
+
27
+ token = env["HF_TOKEN"]
28
+ repo_id = env["HF_REPO_ID"]
29
+ api = HfApi(token=token)
30
+
31
+ try:
32
+ commit_info = api.upload_folder(
33
+ folder_path=str(output_dir),
34
+ repo_id=repo_id,
35
+ repo_type="dataset",
36
+ path_in_repo=repo_path_prefix,
37
+ ignore_patterns=[".my-info-tmp/**"],
38
+ commit_message=f"Add run: {repo_path_prefix}",
39
+ )
40
+ commit_url = getattr(commit_info, "commit_url", None) or ""
41
+ print(f" Uploaded to HF: {repo_id}/{repo_path_prefix}")
42
+
43
+ # Replace local data/ with a lightweight marker
44
+ marker = {
45
+ "repo_id": repo_id,
46
+ "path_in_repo": repo_path_prefix,
47
+ "commit_url": commit_url,
48
+ "uploaded_at": datetime.now(timezone.utc).isoformat(),
49
+ }
50
+ (output_dir / "uploaded.json").write_text(json.dumps(marker, indent=2))
51
+
52
+ except Exception as e:
53
+ print(f" WARNING: HuggingFace upload failed: {e}")
54
+
55
+
56
+ def upload_file(local_path: Path, path_in_repo: str, env: dict[str, str]) -> None:
57
+ """Upload a single file to HuggingFace (e.g. batch-summary.json)."""
58
+ try:
59
+ from huggingface_hub import HfApi
60
+ except ImportError:
61
+ print(" WARNING: huggingface_hub not installed, skipping upload")
62
+ return
63
+
64
+ token = env["HF_TOKEN"]
65
+ repo_id = env["HF_REPO_ID"]
66
+ api = HfApi(token=token)
67
+
68
+ try:
69
+ api.upload_file(
70
+ path_or_fileobj=str(local_path),
71
+ path_in_repo=path_in_repo,
72
+ repo_id=repo_id,
73
+ repo_type="dataset",
74
+ commit_message=f"Add {path_in_repo}",
75
+ )
76
+ print(f" Uploaded to HF: {repo_id}/{path_in_repo}")
77
+ except Exception as e:
78
+ print(f" WARNING: HuggingFace upload failed: {e}")
clawbench/image.py ADDED
@@ -0,0 +1,127 @@
1
+ """Container image lifecycle helpers.
2
+
3
+ The single responsibility here is answering the question: *is the
4
+ ``clawbench`` image available locally and is it the right version?* —
5
+ pulling from the registry when it isn't, and falling back to a local
6
+ build when pull fails (offline, rate-limited, arch mismatch).
7
+
8
+ Why pull-first, build-fallback:
9
+
10
+ - A first-time ``docker build`` takes 5-10 minutes on a fresh system.
11
+ For users who just typed ``pip install claw-bench``, that is an awful
12
+ first impression. A prebuilt image on GHCR is an order of magnitude
13
+ faster and already exists on the release pipeline.
14
+ - But pulls can fail in ways builds cannot (behind an enterprise proxy,
15
+ no GHCR auth, unsupported arch). Silently falling back to build keeps
16
+ the package usable in those environments instead of hard-erroring.
17
+
18
+ Version-label check:
19
+
20
+ - The release CI tags images with ``LABEL org.clawbench.version=<v>``
21
+ matching the pypi version. We warn loudly (but keep going) if the
22
+ local image's label diverges from ``clawbench.__version__`` — the
23
+ single most common post-release footgun is "works locally because I
24
+ have a stale hand-built image that nobody else has."
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ import subprocess
30
+
31
+ from clawbench import __version__
32
+ from clawbench.engine import detect_engine
33
+
34
+ IMAGE_NAME = "clawbench"
35
+ REGISTRY_REF = "ghcr.io/reacher-z/claw-bench"
36
+ VERSION_LABEL = "org.clawbench.version"
37
+
38
+
39
+ def _engine_or_fail() -> str:
40
+ eng = detect_engine()
41
+ if eng is None:
42
+ raise RuntimeError(
43
+ "No container engine (podman or docker) found on PATH. "
44
+ "Install podman: https://podman.io/docs/installation"
45
+ )
46
+ return eng
47
+
48
+
49
+ def image_exists(engine: str | None = None, ref: str = IMAGE_NAME) -> bool:
50
+ """Return True if ``ref`` is present in the local image store."""
51
+ eng = engine or _engine_or_fail()
52
+ return subprocess.run(
53
+ [eng, "image", "inspect", ref],
54
+ capture_output=True,
55
+ ).returncode == 0
56
+
57
+
58
+ def image_label(engine: str | None = None, ref: str = IMAGE_NAME) -> str | None:
59
+ """Return the ``org.clawbench.version`` label from the local image,
60
+ or ``None`` if the image isn't present or has no label."""
61
+ eng = engine or _engine_or_fail()
62
+ r = subprocess.run(
63
+ [eng, "image", "inspect", "--format",
64
+ "{{ index .Config.Labels \"" + VERSION_LABEL + "\" }}", ref],
65
+ capture_output=True, text=True,
66
+ )
67
+ if r.returncode != 0:
68
+ return None
69
+ label = r.stdout.strip()
70
+ return label or None
71
+
72
+
73
+ def pull_image(
74
+ engine: str | None = None,
75
+ tag: str | None = None,
76
+ ) -> tuple[bool, str]:
77
+ """Attempt to pull ``ghcr.io/reacher-z/claw-bench:<tag>`` and retag it
78
+ locally as ``clawbench`` so the rest of the code keeps working.
79
+
80
+ Returns ``(success, detail)``. ``detail`` is a diagnostic string with
81
+ the pull command's stderr on failure, empty on success.
82
+
83
+ ``tag`` defaults to the installed package version; callers that want
84
+ ``:latest`` explicitly can pass it.
85
+ """
86
+ eng = engine or _engine_or_fail()
87
+ use_tag = tag or __version__
88
+ ref = f"{REGISTRY_REF}:{use_tag}"
89
+ r = subprocess.run(
90
+ [eng, "pull", ref],
91
+ capture_output=True, text=True,
92
+ )
93
+ if r.returncode != 0:
94
+ return False, r.stderr.strip() or r.stdout.strip()
95
+ # Retag so the existing run.py / tui.py code paths that say
96
+ # ``clawbench`` (un-prefixed) keep working.
97
+ tag_r = subprocess.run(
98
+ [eng, "tag", ref, IMAGE_NAME],
99
+ capture_output=True, text=True,
100
+ )
101
+ if tag_r.returncode != 0:
102
+ return False, tag_r.stderr.strip()
103
+ return True, ""
104
+
105
+
106
+ def verify_image_version(engine: str | None = None) -> tuple[bool, str]:
107
+ """Check whether the local image's version label matches the installed
108
+ wheel's version. Returns ``(matches, detail)``:
109
+
110
+ - ``(True, "")`` when the label equals ``__version__`` (or when the
111
+ image has no label at all — we treat unlabeled legacy images as OK
112
+ since they predate this scheme and warning on them would be noisy
113
+ for existing users).
114
+ - ``(False, msg)`` when labels mismatch; ``msg`` is user-facing.
115
+ """
116
+ eng = engine or _engine_or_fail()
117
+ if not image_exists(eng):
118
+ return False, f"image '{IMAGE_NAME}' not present locally"
119
+ label = image_label(eng)
120
+ if label is None:
121
+ return True, "" # legacy image, no label — accept
122
+ if label == __version__:
123
+ return True, ""
124
+ return False, (
125
+ f"image version label '{label}' != package version '{__version__}'. "
126
+ f"Consider `claw-bench build --no-cache` to rebuild."
127
+ )
clawbench/paths.py ADDED
@@ -0,0 +1,150 @@
1
+ """Path helpers for the installed package.
2
+
3
+ Three kinds of locations:
4
+
5
+ 1. **Bundled read-only data** inside the wheel — test cases, chrome extension,
6
+ dockerfile set, personal-info templates. Always accessed via
7
+ :func:`bundled_data_dir` (returns a real ``Path`` so it can be handed to
8
+ subprocess / ``docker build`` without further juggling).
9
+
10
+ 2. **User config** — per-user mutable state. Chosen via :mod:`platformdirs` so
11
+ macOS gets ``~/Library/Application Support/claw-bench`` and Linux gets
12
+ ``~/.config/claw-bench`` (respecting ``XDG_CONFIG_HOME``). Contains
13
+ ``models.yaml``, ``config.json``, optional ``secrets.env``.
14
+
15
+ 3. **Output directory** — where run artifacts land. Defaults to
16
+ ``./claw-output/`` in the caller's current directory, overridable via
17
+ ``--output-dir`` or ``CLAWBENCH_OUTPUT_DIR``.
18
+
19
+ We also migrate from the pre-package legacy dir ``~/.config/clawbench/`` on
20
+ first access so users coming from source installs keep their preferences.
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import os
26
+ import shutil
27
+ from importlib import resources
28
+ from pathlib import Path
29
+
30
+ from platformdirs import PlatformDirs
31
+
32
+ _APP_NAME = "claw-bench"
33
+ _LEGACY_CONFIG_DIR = Path.home() / ".config" / "clawbench"
34
+
35
+ _dirs = PlatformDirs(_APP_NAME, appauthor=False)
36
+
37
+
38
+ def bundled_data_dir() -> Path:
39
+ """Return the on-disk path to read-only bundled assets.
40
+
41
+ Uses ``importlib.resources.files("clawbench")`` which resolves to a real
42
+ filesystem path when the package is installed normally (wheel or editable).
43
+ We need a real ``Path`` rather than a ``Traversable`` because the
44
+ ``docker build`` context and ``--load-extension`` need a real directory.
45
+ """
46
+ root = resources.files("clawbench") / "data"
47
+ # ``files()`` returns a MultiplexedPath in rare cases (namespace packages);
48
+ # for single-package layouts it yields a PosixPath/WindowsPath directly.
49
+ return Path(str(root))
50
+
51
+
52
+ def test_cases_dir() -> Path:
53
+ return bundled_data_dir() / "test-cases"
54
+
55
+
56
+ def chrome_extension_dir() -> Path:
57
+ return bundled_data_dir() / "chrome-extension"
58
+
59
+
60
+ def extension_server_dir() -> Path:
61
+ return bundled_data_dir() / "extension-server"
62
+
63
+
64
+ def shared_dir() -> Path:
65
+ return bundled_data_dir() / "shared"
66
+
67
+
68
+ def docker_build_dir() -> Path:
69
+ """Directory containing Dockerfile + entrypoint.sh + setup-openclaw.sh."""
70
+ return bundled_data_dir() / "docker"
71
+
72
+
73
+ def bundled_models_yaml() -> Path:
74
+ """Seed template copied into the user config dir on first run.
75
+
76
+ We intentionally ship the *example* file, not the developer's live
77
+ ``models.yaml``. The live file in the repo may contain real API keys
78
+ (OpenRouter et al.) committed for local convenience — those must not
79
+ land on PyPI where every wheel is permanently indexed."""
80
+ return bundled_data_dir() / "models" / "models.example.yaml"
81
+
82
+
83
+ def user_config_dir() -> Path:
84
+ """Platform-appropriate per-user config directory (created if missing)."""
85
+ d = Path(_dirs.user_config_dir)
86
+ d.mkdir(parents=True, exist_ok=True)
87
+ _migrate_legacy_config(d)
88
+ return d
89
+
90
+
91
+ def user_models_yaml() -> Path:
92
+ """Path to the user's editable models config. Seeded from the bundled
93
+ template on first access so the file always exists for the TUI editor."""
94
+ dst = user_config_dir() / "models.yaml"
95
+ if not dst.exists():
96
+ src = bundled_models_yaml()
97
+ if src.exists():
98
+ shutil.copyfile(src, dst)
99
+ else:
100
+ dst.write_text("# ClawBench models.yaml\nmodels: {}\n", encoding="utf-8")
101
+ return dst
102
+
103
+
104
+ def user_config_json() -> Path:
105
+ """TUI preferences (theme, last-used options)."""
106
+ return user_config_dir() / "config.json"
107
+
108
+
109
+ def user_secrets_path() -> Path:
110
+ """Optional persisted secrets file (PURELYMAIL_API_KEY etc).
111
+
112
+ Not created automatically — the CLI's ``configure --secrets`` writes it
113
+ with chmod 600. ``run`` / ``batch`` load it via python-dotenv if present.
114
+ """
115
+ return user_config_dir() / "secrets.env"
116
+
117
+
118
+ def default_output_dir() -> Path:
119
+ """Default run output directory.
120
+
121
+ Order of precedence:
122
+ 1. ``CLAWBENCH_OUTPUT_DIR`` environment variable.
123
+ 2. ``./claw-output`` in the caller's current working directory.
124
+ """
125
+ env = os.environ.get("CLAWBENCH_OUTPUT_DIR")
126
+ if env:
127
+ return Path(env).expanduser().resolve()
128
+ return Path.cwd() / "claw-output"
129
+
130
+
131
+ def _migrate_legacy_config(new_dir: Path) -> None:
132
+ """One-shot migration from ``~/.config/clawbench/`` to the platformdirs
133
+ location. Copies files that don't already exist at the new location and
134
+ leaves the legacy dir alone so source installs keep working."""
135
+ if not _LEGACY_CONFIG_DIR.is_dir() or new_dir == _LEGACY_CONFIG_DIR:
136
+ return
137
+ for name in ("tui.json", "config.json", "models.yaml"):
138
+ src = _LEGACY_CONFIG_DIR / name
139
+ if not src.exists():
140
+ continue
141
+ # Normalize legacy tui.json filename to config.json going forward.
142
+ dst_name = "config.json" if name == "tui.json" else name
143
+ dst = new_dir / dst_name
144
+ if dst.exists():
145
+ continue
146
+ try:
147
+ shutil.copyfile(src, dst)
148
+ except OSError:
149
+ # Migration is best-effort; the CLI still works without it.
150
+ pass
@@ -0,0 +1,104 @@
1
+ {
2
+ "// NOTE": "All overlapping fields MUST match personal_info.json exactly.",
3
+ "header": {
4
+ "name": "Alex Green",
5
+ "title": "Senior Software Engineer",
6
+ "email": "dummy_email",
7
+ "location": "Toronto, ON, Canada"
8
+ },
9
+ "summary": "Senior Software Engineer with 23+ years of experience in full-stack development, distributed systems, and cloud infrastructure. PhD in Computer Science from the University of Toronto. Currently leading a backend team at Pinecrest Technologies Inc., building enterprise data pipeline solutions. Previously built real-time transaction processing systems in FinTech. AWS and Kubernetes certified.",
10
+ "experience": [
11
+ {
12
+ "title": "Senior Software Engineer",
13
+ "company": "Pinecrest Technologies Inc.",
14
+ "location": "Toronto, ON",
15
+ "dates": "Mar 2019 – Present",
16
+ "bullets": [
17
+ "Lead backend team of 5 engineers building distributed data pipelines for enterprise SaaS platform",
18
+ "Design and implement RESTful APIs serving 2M+ daily requests with sub-100ms p99 latency",
19
+ "Mentor junior developers and conduct code reviews, improving team velocity by 30%"
20
+ ]
21
+ },
22
+ {
23
+ "title": "Software Engineer",
24
+ "company": "Crestridge Digital Corp.",
25
+ "location": "Toronto, ON",
26
+ "dates": "Jun 2012 - Feb 2019",
27
+ "bullets": [
28
+ "Developed real-time transaction processing systems handling $50M+ daily volume in FinTech",
29
+ "Built automated testing frameworks reducing QA cycle by 40%",
30
+ "Collaborated with product team on mobile banking features serving 500K+ users"
31
+ ]
32
+ },
33
+ {
34
+ "title": "Software Developer",
35
+ "company": "Cedarbrook Solutions Ltd.",
36
+ "location": "Toronto, ON",
37
+ "dates": "Sep 2002 - May 2012",
38
+ "bullets": [
39
+ "Full-stack web development for enterprise clients across multiple industries",
40
+ "Database administration and performance optimization for high-traffic applications",
41
+ "Part-time during graduate studies (2002-2010); full-time from 2010"
42
+ ]
43
+ }
44
+ ],
45
+ "education": [
46
+ {
47
+ "degree": "Ph.D. in Computer Science",
48
+ "institution": "University of Toronto",
49
+ "dates": "2004 - 2010",
50
+ "detail": "Dissertation: Scalable Real-Time Data Pipeline Architectures for High-Throughput Transaction Processing"
51
+ },
52
+ {
53
+ "degree": "M.Sc. in Computer Science",
54
+ "institution": "University of Toronto",
55
+ "dates": "2002 - 2004",
56
+ "detail": "GPA: 3.8/4.0 | Thesis: Efficient Query Processing in Distributed Database Systems"
57
+ },
58
+ {
59
+ "degree": "B.Sc. in Computer Science",
60
+ "institution": "University of Toronto",
61
+ "dates": "1998 - 2002",
62
+ "detail": "GPA: 3.6/4.0 | Dean's List (2001, 2002)"
63
+ }
64
+ ],
65
+ "skills": {
66
+ "languages": [
67
+ "Python",
68
+ "Java",
69
+ "TypeScript",
70
+ "Go"
71
+ ],
72
+ "databases": [
73
+ "PostgreSQL",
74
+ "Redis"
75
+ ],
76
+ "cloud_devops": [
77
+ "AWS",
78
+ "Docker",
79
+ "Kubernetes",
80
+ "Terraform",
81
+ "CI/CD"
82
+ ],
83
+ "frameworks": [
84
+ "React",
85
+ "Node.js",
86
+ "GraphQL",
87
+ "REST API Design"
88
+ ]
89
+ },
90
+ "certifications": [
91
+ "AWS Solutions Architect – Associate (2024)",
92
+ "Certified Kubernetes Administrator – CKA (2025)"
93
+ ],
94
+ "languages": [
95
+ {
96
+ "language": "English",
97
+ "proficiency": "Native"
98
+ },
99
+ {
100
+ "language": "French",
101
+ "proficiency": "Intermediate (B1)"
102
+ }
103
+ ]
104
+ }