clawbench-cli 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clawbench/__init__.py +35 -0
- clawbench/__main__.py +8 -0
- clawbench/batch.py +619 -0
- clawbench/cli.py +397 -0
- clawbench/data/chrome-extension/README.md +127 -0
- clawbench/data/chrome-extension/background.js +50 -0
- clawbench/data/chrome-extension/content.js +70 -0
- clawbench/data/chrome-extension/manifest.json +25 -0
- clawbench/data/chrome-extension/setup.sh +27 -0
- clawbench/data/chrome-extension/stealth.js +200 -0
- clawbench/data/docker/Dockerfile +51 -0
- clawbench/data/docker/entrypoint.sh +394 -0
- clawbench/data/docker/setup-openclaw.sh +112 -0
- clawbench/data/eval/README.md +95 -0
- clawbench/data/eval/agentic_eval.md +53 -0
- clawbench/data/extension-server/.python-version +1 -0
- clawbench/data/extension-server/README.md +54 -0
- clawbench/data/extension-server/pyproject.toml +7 -0
- clawbench/data/extension-server/server.py +360 -0
- clawbench/data/extension-server/uv.lock +644 -0
- clawbench/data/models/model.schema.json +44 -0
- clawbench/data/models/models.example.yaml +16 -0
- clawbench/data/shared/alex_green_personal_info.json +451 -0
- clawbench/data/test-cases/001-daily-life-food-uber-eats/task.json +25 -0
- clawbench/data/test-cases/002-daily-life-food-doordash/task.json +25 -0
- clawbench/data/test-cases/004-daily-life-food-instacart/extra_info/grocery_list.json +36 -0
- clawbench/data/test-cases/004-daily-life-food-instacart/task.json +30 -0
- clawbench/data/test-cases/006-daily-life-food-uber-eats/task.json +24 -0
- clawbench/data/test-cases/007-daily-life-food-instacart/extra_info/meal_plan.json +21 -0
- clawbench/data/test-cases/007-daily-life-food-instacart/task.json +30 -0
- clawbench/data/test-cases/011-daily-life-housing-zillow/task.json +25 -0
- clawbench/data/test-cases/015-daily-life-housing-craigslist/extra_info/listing_details.json +26 -0
- clawbench/data/test-cases/015-daily-life-housing-craigslist/task.json +30 -0
- clawbench/data/test-cases/035-daily-life-health-medical-betterhelp/task.json +25 -0
- clawbench/data/test-cases/041-daily-life-pets-rover/task.json +25 -0
- clawbench/data/test-cases/043-daily-life-pets-rover/extra_info/pet_info.json +12 -0
- clawbench/data/test-cases/043-daily-life-pets-rover/task.json +30 -0
- clawbench/data/test-cases/045-daily-life-personal-care-booksy/task.json +25 -0
- clawbench/data/test-cases/047-daily-life-personal-care-taskrabbit/extra_info/address_info.json +7 -0
- clawbench/data/test-cases/047-daily-life-personal-care-taskrabbit/task.json +30 -0
- clawbench/data/test-cases/086-job-search-hr-cv-autofill-greenhouse-meta/extra_info/job_links.json +5 -0
- clawbench/data/test-cases/086-job-search-hr-cv-autofill-greenhouse-meta/task.json +30 -0
- clawbench/data/test-cases/089-job-search-hr-cv-autofill-simplify-jobs/extra_info/job_links.json +5 -0
- clawbench/data/test-cases/089-job-search-hr-cv-autofill-simplify-jobs/task.json +30 -0
- clawbench/data/test-cases/091-job-search-hr-job-apply-indeed/task.json +25 -0
- clawbench/data/test-cases/120-office-secretary-tasks-email-mgmt-purelymail/task.json +28 -0
- clawbench/data/test-cases/121-office-secretary-tasks-email-mgmt-purelymail/task.json +28 -0
- clawbench/data/test-cases/128-office-secretary-tasks-email-mgmt-purelymail/task.json +28 -0
- clawbench/data/test-cases/134-office-secretary-tasks-calendar-calendly/task.json +25 -0
- clawbench/data/test-cases/137-office-secretary-tasks-calendar-doodle/extra_info/meeting_details.json +30 -0
- clawbench/data/test-cases/137-office-secretary-tasks-calendar-doodle/task.json +30 -0
- clawbench/data/test-cases/139-office-secretary-tasks-calendar-calendly/task.json +25 -0
- clawbench/data/test-cases/142-office-secretary-tasks-collab-trello/extra_info/task_list.json +29 -0
- clawbench/data/test-cases/142-office-secretary-tasks-collab-trello/task.json +30 -0
- clawbench/data/test-cases/179-dev-tech-github-ops-github/extra_info/config.json +13 -0
- clawbench/data/test-cases/179-dev-tech-github-ops-github/task.json +30 -0
- clawbench/data/test-cases/180-dev-tech-github-ops-github/task.json +25 -0
- clawbench/data/test-cases/215-academia-research-paper-tables-overleaf/extra_info/raw_results.json +47 -0
- clawbench/data/test-cases/215-academia-research-paper-tables-overleaf/task.json +30 -0
- clawbench/data/test-cases/242-academia-research-research-tools-overleaf/task.json +25 -0
- clawbench/data/test-cases/246-academia-research-research-tools-zotero/task.json +25 -0
- clawbench/data/test-cases/247-academia-research-research-tools-semantic-scholar/task.json +25 -0
- clawbench/data/test-cases/265-education-learning-general-coursera/task.json +25 -0
- clawbench/data/test-cases/266-education-learning-general-leetcode/extra_info/solution_code.py +9 -0
- clawbench/data/test-cases/266-education-learning-general-leetcode/task.json +30 -0
- clawbench/data/test-cases/273-education-learning-general-edx/task.json +25 -0
- clawbench/data/test-cases/274-education-learning-general-udemy/task.json +25 -0
- clawbench/data/test-cases/279-travel-general-airbnb/task.json +25 -0
- clawbench/data/test-cases/280-travel-general-booking-com/task.json +25 -0
- clawbench/data/test-cases/363-entertainment-hobbies-general-ticketmaster/task.json +25 -0
- clawbench/data/test-cases/369-entertainment-hobbies-general-goodreads/extra_info/book_list.json +14 -0
- clawbench/data/test-cases/369-entertainment-hobbies-general-goodreads/task.json +30 -0
- clawbench/data/test-cases/372-entertainment-hobbies-general-eventbrite/extra_info/event_details.json +10 -0
- clawbench/data/test-cases/372-entertainment-hobbies-general-eventbrite/task.json +30 -0
- clawbench/data/test-cases/403-personal-management-account-security-1password-web/extra_info/credentials.json +34 -0
- clawbench/data/test-cases/403-personal-management-account-security-1password-web/task.json +30 -0
- clawbench/data/test-cases/413-personal-management-personal-tools-todoist/extra_info/task_list.json +52 -0
- clawbench/data/test-cases/413-personal-management-personal-tools-todoist/task.json +30 -0
- clawbench/data/test-cases/468-rating-voting-general-glassdoor/extra_info/interview_experience.json +10 -0
- clawbench/data/test-cases/468-rating-voting-general-glassdoor/task.json +30 -0
- clawbench/data/test-cases/469-rating-voting-general-tripadvisor/extra_info/review_content.json +6 -0
- clawbench/data/test-cases/469-rating-voting-general-tripadvisor/task.json +30 -0
- clawbench/data/test-cases/470-rating-voting-general-trustpilot/extra_info/review_content.json +6 -0
- clawbench/data/test-cases/470-rating-voting-general-trustpilot/task.json +30 -0
- clawbench/data/test-cases/474-rating-voting-general-capterra/task.json +25 -0
- clawbench/data/test-cases/475-rating-voting-general-g2/task.json +25 -0
- clawbench/data/test-cases/482-creation-init-general-confluence/extra_info/content.json +3 -0
- clawbench/data/test-cases/482-creation-init-general-confluence/task.json +30 -0
- clawbench/data/test-cases/483-creation-init-general-airtable/task.json +25 -0
- clawbench/data/test-cases/484-creation-init-general-clickup/task.json +28 -0
- clawbench/data/test-cases/485-creation-init-general-webflow/task.json +25 -0
- clawbench/data/test-cases/486-creation-init-general-mailchimp/extra_info/content.json +3 -0
- clawbench/data/test-cases/486-creation-init-general-mailchimp/task.json +30 -0
- clawbench/data/test-cases/487-creation-init-general-typeform/extra_info/survey_questions.json +85 -0
- clawbench/data/test-cases/487-creation-init-general-typeform/task.json +30 -0
- clawbench/data/test-cases/488-creation-init-general-substack/extra_info/content.json +3 -0
- clawbench/data/test-cases/488-creation-init-general-substack/task.json +30 -0
- clawbench/data/test-cases/489-creation-init-general-ghost/extra_info/content.json +3 -0
- clawbench/data/test-cases/489-creation-init-general-ghost/task.json +30 -0
- clawbench/data/test-cases/501-creation-init-general-asana/extra_info/project_description.json +8 -0
- clawbench/data/test-cases/501-creation-init-general-asana/task.json +33 -0
- clawbench/data/test-cases/529-daily-life-shopping-delivery-king-arthur-baking/task.json +25 -0
- clawbench/data/test-cases/533-daily-life-utilities-inmyarea/task.json +25 -0
- clawbench/data/test-cases/535-daily-life-home-home-depot/task.json +25 -0
- clawbench/data/test-cases/537-daily-life-food-crumbl/task.json +25 -0
- clawbench/data/test-cases/539-daily-life-health-jefit/task.json +25 -0
- clawbench/data/test-cases/542-daily-life-pets-wag/task.json +25 -0
- clawbench/data/test-cases/551-finance-investment-crypto-wallet-trezor/task.json +25 -0
- clawbench/data/test-cases/552-finance-investment-business-payment-plooto/task.json +25 -0
- clawbench/data/test-cases/555-finance-investment-insurance-insureon/task.json +25 -0
- clawbench/data/test-cases/559-finance-investment-crowdfunding-frontfundr/task.json +25 -0
- clawbench/data/test-cases/564-daily-life-event-registration-race-roster/task.json +25 -0
- clawbench/data/test-cases/565-job-search-hr-job-search-jopwell/task.json +25 -0
- clawbench/data/test-cases/566-job-search-hr-job-search-ziprecruiter/extra_info/listing_details.json +26 -0
- clawbench/data/test-cases/566-job-search-hr-job-search-ziprecruiter/task.json +30 -0
- clawbench/data/test-cases/569-job-search-hr-job-search-careerbuilder/task.json +25 -0
- clawbench/data/test-cases/570-job-search-hr-job-search-hired/task.json +25 -0
- clawbench/data/test-cases/571-job-search-hr-recruitment-mgmt-workable/extra_info/listing_details.json +26 -0
- clawbench/data/test-cases/571-job-search-hr-recruitment-mgmt-workable/task.json +30 -0
- clawbench/data/test-cases/576-office-secretary-tasks-reports-ftc-reportfraud/task.json +25 -0
- clawbench/data/test-cases/583-office-secretary-tasks-support-tickets-freshdesk/task.json +25 -0
- clawbench/data/test-cases/598-academia-research-legal-docs-formswift/task.json +25 -0
- clawbench/data/test-cases/606-education-learning-kids-courses-outschool/task.json +25 -0
- clawbench/data/test-cases/607-education-learning-art-courses-creativebug/task.json +25 -0
- clawbench/data/test-cases/609-education-learning-meditation-spirit-rock-meditation-center/task.json +25 -0
- clawbench/data/test-cases/615-travel-flights-spirit-airlines/task.json +25 -0
- clawbench/data/test-cases/618-travel-train-bus-12go-asia/task.json +25 -0
- clawbench/data/test-cases/625-travel-camping-outdoor-parks-canada-reservations/task.json +25 -0
- clawbench/data/test-cases/626-travel-bus-flixbus/task.json +25 -0
- clawbench/data/test-cases/627-travel-flights-momondo/task.json +25 -0
- clawbench/data/test-cases/632-shopping-commerce-beauty-care-olaplex/task.json +25 -0
- clawbench/data/test-cases/634-shopping-commerce-apparel-dooney-bourke/task.json +25 -0
- clawbench/data/test-cases/635-shopping-commerce-gifts-uncommon-goods/task.json +25 -0
- clawbench/data/test-cases/636-shopping-commerce-auto-parts-rockauto/task.json +25 -0
- clawbench/data/test-cases/638-shopping-commerce-print-custom-vistaprint/task.json +25 -0
- clawbench/data/test-cases/639-shopping-commerce-luxury-mansur-gavriel/task.json +25 -0
- clawbench/data/test-cases/671-entertainment-gaming-humble-bundle/task.json +25 -0
- clawbench/data/test-cases/672-entertainment-hobbies-anime-streaming-crunchyroll/task.json +25 -0
- clawbench/data/test-cases/674-entertainment-hobbies-masterclass-masterclass/task.json +25 -0
- clawbench/data/test-cases/676-government-civic-legal-docs-legalnature/task.json +25 -0
- clawbench/data/test-cases/685-personal-management-budget-mgmt-everydollar/task.json +25 -0
- clawbench/data/test-cases/687-personal-management-vpn-subscription-ipvanish/task.json +25 -0
- clawbench/data/test-cases/688-personal-management-insurance-compare-insurify/task.json +25 -0
- clawbench/data/test-cases/695-automation-workflows-recurring-order-stumptown-coffee/task.json +25 -0
- clawbench/data/test-cases/697-automation-workflows-recurring-order-bean-box/task.json +25 -0
- clawbench/data/test-cases/699-automation-workflows-recurring-order-mistobox/task.json +25 -0
- clawbench/data/test-cases/700-deletion-revocation-data-deletion-deleteme/task.json +25 -0
- clawbench/data/test-cases/705-rating-voting-wine-review-vivino/task.json +25 -0
- clawbench/data/test-cases/706-rating-voting-beer-review-beeradvocate/task.json +25 -0
- clawbench/data/test-cases/707-rating-voting-social-wine-untappd/task.json +25 -0
- clawbench/data/test-cases/708-rating-voting-professor-review-ratemyprofessors/task.json +28 -0
- clawbench/data/test-cases/709-rating-voting-service-review-angi/task.json +25 -0
- clawbench/data/test-cases/710-creation-init-interior-design-roomsketcher/task.json +25 -0
- clawbench/data/test-cases/711-creation-init-color-design-coolors/task.json +25 -0
- clawbench/data/test-cases/712-creation-init-website-create-squarespace/task.json +25 -0
- clawbench/data/test-cases/713-creation-init-website-build-wix/task.json +25 -0
- clawbench/data/test-cases/735-home-services-maintenance-house-cleaning-bark/task.json +25 -0
- clawbench/data/test-cases/736-home-services-maintenance-plumbing-ace-hardware/task.json +25 -0
- clawbench/data/test-cases/737-home-services-maintenance-kitchen-remodel-lowes/task.json +25 -0
- clawbench/data/test-cases/738-home-services-maintenance-equipment-install-amazon-home-services/task.json +25 -0
- clawbench/data/test-cases/750-automotive-vehicle-services-car-insurance-compare-kanetix/task.json +25 -0
- clawbench/data/test-cases/751-automotive-vehicle-services-car-lease-sixt/task.json +25 -0
- clawbench/data/test-cases/754-automotive-vehicle-services-used-car-listing-autotrader/task.json +25 -0
- clawbench/data/test-cases/763-automotive-vehicle-services-car-lease-autoslash/task.json +25 -0
- clawbench/data/test-cases/766-nonprofit-charity-donation-doctors-without-borders-msf/task.json +25 -0
- clawbench/data/test-cases/768-nonprofit-charity-community-crowdfund-ioby/task.json +25 -0
- clawbench/data/test-cases/770-nonprofit-charity-volunteer-apply-on-make-a-wish-foundation-website-complete-and-submit-a-volunteer-application-form-selecting-the-wish-granter-role-and-entering-city-phoenix-az/task.json +25 -0
- clawbench/data/test-cases/774-nonprofit-charity-nonprofit-job-apply-charity-village/task.json +25 -0
- clawbench/data/test-cases/776-nonprofit-charity-volunteer-signup-idealist/task.json +25 -0
- clawbench/data/test-cases/778-nonprofit-charity-donation-globalgiving/extra_info/payment_info.json +3 -0
- clawbench/data/test-cases/778-nonprofit-charity-donation-globalgiving/task.json +30 -0
- clawbench/data/test-cases/780-beauty-personal-care-skincare-purchase-soko-glam/extra_info/address_info.json +4 -0
- clawbench/data/test-cases/780-beauty-personal-care-skincare-purchase-soko-glam/task.json +30 -0
- clawbench/data/test-cases/781-beauty-personal-care-beauty-booking-bluemercury/extra_info/email_info.json +3 -0
- clawbench/data/test-cases/781-beauty-personal-care-beauty-booking-bluemercury/task.json +30 -0
- clawbench/data/test-cases/782-beauty-personal-care-skincare-purchase-paulas-choice/task.json +24 -0
- clawbench/data/test-cases/783-beauty-personal-care-beauty-booking-ulta-beauty/task.json +24 -0
- clawbench/data/test-cases/785-beauty-personal-care-skincare-curology/task.json +25 -0
- clawbench/data/test-cases/788-beauty-personal-care-makeup-the-ordinary/task.json +25 -0
- clawbench/data/test-cases/789-beauty-personal-care-makeup-fenty-beauty/task.json +25 -0
- clawbench/data/test-cases/793-beauty-personal-care-beauty-retail-mac-cosmetics/task.json +25 -0
- clawbench/data/test-cases/794-beauty-personal-care-salon-booking-styleseat/task.json +25 -0
- clawbench/data/test-cases/795-pet-animal-care-pet-adoption-aspca/task.json +25 -0
- clawbench/data/test-cases/796-pet-animal-care-pet-supplies-grooming-petsmart/extra_info/pet_info.json +12 -0
- clawbench/data/test-cases/796-pet-animal-care-pet-supplies-grooming-petsmart/task.json +30 -0
- clawbench/data/test-cases/799-pet-animal-care-pet-insurance-aspca-pet-health-insurance/task.json +25 -0
- clawbench/data/test-cases/801-pet-animal-care-pet-friendly-travel-bringfido/task.json +25 -0
- clawbench/data/test-cases/803-pet-animal-care-pet-medical-pawp/extra_info/pet_info.json +12 -0
- clawbench/data/test-cases/803-pet-animal-care-pet-medical-pawp/task.json +30 -0
- clawbench/data/test-cases/807-pet-animal-care-pet-dna-embark/task.json +25 -0
- clawbench/data/test-cases/809-pet-animal-care-pet-adopt-petfinder/task.json +28 -0
- clawbench/data/test-cases/812-pet-animal-care-pet-subscription-ollie/task.json +25 -0
- clawbench/data/test-cases/815-personal-management-records-mgmt-myheritage/task.json +25 -0
- clawbench/data/test-cases/821-education-learning-reading-self-study-blinkist/task.json +25 -0
- clawbench/data/test-cases/861-entertainment-hobbies-movies-cineplex/task.json +25 -0
- clawbench/data/test-cases/862-entertainment-hobbies-movies-amc-theatres/task.json +25 -0
- clawbench/data/test-cases/864-entertainment-hobbies-show-tickets-ticketmaster/task.json +25 -0
- clawbench/data/test-cases/865-travel-outdoor-hipcamp/task.json +25 -0
- clawbench/data/test-cases/867-entertainment-hobbies-movies-fandango/task.json +25 -0
- clawbench/data/test-cases/872-daily-life-food-opentable/task.json +25 -0
- clawbench/data/test-cases/873-daily-life-food-resy/task.json +28 -0
- clawbench/data/test-cases/876-entertainment-hobbies-show-tickets-vivid-seats/task.json +25 -0
- clawbench/data/test-cases/877-entertainment-hobbies-show-tickets-stubhub/task.json +25 -0
- clawbench/data/test-cases/878-travel-outdoor-ontario-parks/task.json +25 -0
- clawbench/data/test-cases/883-education-learning-hobby-class-sur-la-table/task.json +25 -0
- clawbench/data/test-cases/884-entertainment-hobbies-experience-breakout-games/task.json +25 -0
- clawbench/data/test-cases/885-entertainment-hobbies-experience-bowlero/task.json +25 -0
- clawbench/data/test-cases/886-entertainment-hobbies-experience-topgolf/task.json +25 -0
- clawbench/data/test-cases/lite.json +226 -0
- clawbench/data/test-cases/lite.schema.json +105 -0
- clawbench/data/test-cases/task.schema.json +132 -0
- clawbench/data/tools/build_clawbench_lite_enc.py +161 -0
- clawbench/doctor.py +171 -0
- clawbench/engine.py +180 -0
- clawbench/generate_resume_pdf.py +140 -0
- clawbench/hf_upload.py +78 -0
- clawbench/image.py +127 -0
- clawbench/paths.py +150 -0
- clawbench/resume_template.json +104 -0
- clawbench/run.py +942 -0
- clawbench/tui.py +1401 -0
- clawbench_cli-0.1.2.dist-info/METADATA +770 -0
- clawbench_cli-0.1.2.dist-info/RECORD +226 -0
- clawbench_cli-0.1.2.dist-info/WHEEL +4 -0
- clawbench_cli-0.1.2.dist-info/entry_points.txt +4 -0
- clawbench_cli-0.1.2.dist-info/licenses/LICENSE +201 -0
clawbench/hf_upload.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""Optional HuggingFace dataset upload for ClawBench runs."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def hf_upload_enabled(env: dict[str, str]) -> bool:
|
|
9
|
+
"""Check if HF_TOKEN and HF_REPO_ID are configured."""
|
|
10
|
+
return bool(env.get("HF_TOKEN")) and bool(env.get("HF_REPO_ID"))
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def upload_run(output_dir: Path, repo_path_prefix: str, env: dict[str, str]) -> None:
|
|
14
|
+
"""Upload a run's output directory to HuggingFace, then replace local data/ with a marker.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
output_dir: Local output path (contains run-meta.json, data/).
|
|
18
|
+
repo_path_prefix: Path inside the HF repo, e.g. "model/case-model-ts".
|
|
19
|
+
env: Dict with HF_TOKEN and HF_REPO_ID.
|
|
20
|
+
"""
|
|
21
|
+
try:
|
|
22
|
+
from huggingface_hub import HfApi
|
|
23
|
+
except ImportError:
|
|
24
|
+
print(" WARNING: huggingface_hub not installed, skipping upload")
|
|
25
|
+
return
|
|
26
|
+
|
|
27
|
+
token = env["HF_TOKEN"]
|
|
28
|
+
repo_id = env["HF_REPO_ID"]
|
|
29
|
+
api = HfApi(token=token)
|
|
30
|
+
|
|
31
|
+
try:
|
|
32
|
+
commit_info = api.upload_folder(
|
|
33
|
+
folder_path=str(output_dir),
|
|
34
|
+
repo_id=repo_id,
|
|
35
|
+
repo_type="dataset",
|
|
36
|
+
path_in_repo=repo_path_prefix,
|
|
37
|
+
ignore_patterns=[".my-info-tmp/**"],
|
|
38
|
+
commit_message=f"Add run: {repo_path_prefix}",
|
|
39
|
+
)
|
|
40
|
+
commit_url = getattr(commit_info, "commit_url", None) or ""
|
|
41
|
+
print(f" Uploaded to HF: {repo_id}/{repo_path_prefix}")
|
|
42
|
+
|
|
43
|
+
# Replace local data/ with a lightweight marker
|
|
44
|
+
marker = {
|
|
45
|
+
"repo_id": repo_id,
|
|
46
|
+
"path_in_repo": repo_path_prefix,
|
|
47
|
+
"commit_url": commit_url,
|
|
48
|
+
"uploaded_at": datetime.now(timezone.utc).isoformat(),
|
|
49
|
+
}
|
|
50
|
+
(output_dir / "uploaded.json").write_text(json.dumps(marker, indent=2))
|
|
51
|
+
|
|
52
|
+
except Exception as e:
|
|
53
|
+
print(f" WARNING: HuggingFace upload failed: {e}")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def upload_file(local_path: Path, path_in_repo: str, env: dict[str, str]) -> None:
|
|
57
|
+
"""Upload a single file to HuggingFace (e.g. batch-summary.json)."""
|
|
58
|
+
try:
|
|
59
|
+
from huggingface_hub import HfApi
|
|
60
|
+
except ImportError:
|
|
61
|
+
print(" WARNING: huggingface_hub not installed, skipping upload")
|
|
62
|
+
return
|
|
63
|
+
|
|
64
|
+
token = env["HF_TOKEN"]
|
|
65
|
+
repo_id = env["HF_REPO_ID"]
|
|
66
|
+
api = HfApi(token=token)
|
|
67
|
+
|
|
68
|
+
try:
|
|
69
|
+
api.upload_file(
|
|
70
|
+
path_or_fileobj=str(local_path),
|
|
71
|
+
path_in_repo=path_in_repo,
|
|
72
|
+
repo_id=repo_id,
|
|
73
|
+
repo_type="dataset",
|
|
74
|
+
commit_message=f"Add {path_in_repo}",
|
|
75
|
+
)
|
|
76
|
+
print(f" Uploaded to HF: {repo_id}/{path_in_repo}")
|
|
77
|
+
except Exception as e:
|
|
78
|
+
print(f" WARNING: HuggingFace upload failed: {e}")
|
clawbench/image.py
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""Container image lifecycle helpers.
|
|
2
|
+
|
|
3
|
+
The single responsibility here is answering the question: *is the
|
|
4
|
+
``clawbench`` image available locally and is it the right version?* —
|
|
5
|
+
pulling from the registry when it isn't, and falling back to a local
|
|
6
|
+
build when pull fails (offline, rate-limited, arch mismatch).
|
|
7
|
+
|
|
8
|
+
Why pull-first, build-fallback:
|
|
9
|
+
|
|
10
|
+
- A first-time ``docker build`` takes 5-10 minutes on a fresh system.
|
|
11
|
+
For users who just typed ``pip install claw-bench``, that is an awful
|
|
12
|
+
first impression. A prebuilt image on GHCR is an order of magnitude
|
|
13
|
+
faster and already exists on the release pipeline.
|
|
14
|
+
- But pulls can fail in ways builds cannot (behind an enterprise proxy,
|
|
15
|
+
no GHCR auth, unsupported arch). Silently falling back to build keeps
|
|
16
|
+
the package usable in those environments instead of hard-erroring.
|
|
17
|
+
|
|
18
|
+
Version-label check:
|
|
19
|
+
|
|
20
|
+
- The release CI tags images with ``LABEL org.clawbench.version=<v>``
|
|
21
|
+
matching the pypi version. We warn loudly (but keep going) if the
|
|
22
|
+
local image's label diverges from ``clawbench.__version__`` — the
|
|
23
|
+
single most common post-release footgun is "works locally because I
|
|
24
|
+
have a stale hand-built image that nobody else has."
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
from __future__ import annotations
|
|
28
|
+
|
|
29
|
+
import subprocess
|
|
30
|
+
|
|
31
|
+
from clawbench import __version__
|
|
32
|
+
from clawbench.engine import detect_engine
|
|
33
|
+
|
|
34
|
+
IMAGE_NAME = "clawbench"
|
|
35
|
+
REGISTRY_REF = "ghcr.io/reacher-z/claw-bench"
|
|
36
|
+
VERSION_LABEL = "org.clawbench.version"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _engine_or_fail() -> str:
|
|
40
|
+
eng = detect_engine()
|
|
41
|
+
if eng is None:
|
|
42
|
+
raise RuntimeError(
|
|
43
|
+
"No container engine (podman or docker) found on PATH. "
|
|
44
|
+
"Install podman: https://podman.io/docs/installation"
|
|
45
|
+
)
|
|
46
|
+
return eng
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def image_exists(engine: str | None = None, ref: str = IMAGE_NAME) -> bool:
|
|
50
|
+
"""Return True if ``ref`` is present in the local image store."""
|
|
51
|
+
eng = engine or _engine_or_fail()
|
|
52
|
+
return subprocess.run(
|
|
53
|
+
[eng, "image", "inspect", ref],
|
|
54
|
+
capture_output=True,
|
|
55
|
+
).returncode == 0
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def image_label(engine: str | None = None, ref: str = IMAGE_NAME) -> str | None:
|
|
59
|
+
"""Return the ``org.clawbench.version`` label from the local image,
|
|
60
|
+
or ``None`` if the image isn't present or has no label."""
|
|
61
|
+
eng = engine or _engine_or_fail()
|
|
62
|
+
r = subprocess.run(
|
|
63
|
+
[eng, "image", "inspect", "--format",
|
|
64
|
+
"{{ index .Config.Labels \"" + VERSION_LABEL + "\" }}", ref],
|
|
65
|
+
capture_output=True, text=True,
|
|
66
|
+
)
|
|
67
|
+
if r.returncode != 0:
|
|
68
|
+
return None
|
|
69
|
+
label = r.stdout.strip()
|
|
70
|
+
return label or None
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def pull_image(
|
|
74
|
+
engine: str | None = None,
|
|
75
|
+
tag: str | None = None,
|
|
76
|
+
) -> tuple[bool, str]:
|
|
77
|
+
"""Attempt to pull ``ghcr.io/reacher-z/claw-bench:<tag>`` and retag it
|
|
78
|
+
locally as ``clawbench`` so the rest of the code keeps working.
|
|
79
|
+
|
|
80
|
+
Returns ``(success, detail)``. ``detail`` is a diagnostic string with
|
|
81
|
+
the pull command's stderr on failure, empty on success.
|
|
82
|
+
|
|
83
|
+
``tag`` defaults to the installed package version; callers that want
|
|
84
|
+
``:latest`` explicitly can pass it.
|
|
85
|
+
"""
|
|
86
|
+
eng = engine or _engine_or_fail()
|
|
87
|
+
use_tag = tag or __version__
|
|
88
|
+
ref = f"{REGISTRY_REF}:{use_tag}"
|
|
89
|
+
r = subprocess.run(
|
|
90
|
+
[eng, "pull", ref],
|
|
91
|
+
capture_output=True, text=True,
|
|
92
|
+
)
|
|
93
|
+
if r.returncode != 0:
|
|
94
|
+
return False, r.stderr.strip() or r.stdout.strip()
|
|
95
|
+
# Retag so the existing run.py / tui.py code paths that say
|
|
96
|
+
# ``clawbench`` (un-prefixed) keep working.
|
|
97
|
+
tag_r = subprocess.run(
|
|
98
|
+
[eng, "tag", ref, IMAGE_NAME],
|
|
99
|
+
capture_output=True, text=True,
|
|
100
|
+
)
|
|
101
|
+
if tag_r.returncode != 0:
|
|
102
|
+
return False, tag_r.stderr.strip()
|
|
103
|
+
return True, ""
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def verify_image_version(engine: str | None = None) -> tuple[bool, str]:
|
|
107
|
+
"""Check whether the local image's version label matches the installed
|
|
108
|
+
wheel's version. Returns ``(matches, detail)``:
|
|
109
|
+
|
|
110
|
+
- ``(True, "")`` when the label equals ``__version__`` (or when the
|
|
111
|
+
image has no label at all — we treat unlabeled legacy images as OK
|
|
112
|
+
since they predate this scheme and warning on them would be noisy
|
|
113
|
+
for existing users).
|
|
114
|
+
- ``(False, msg)`` when labels mismatch; ``msg`` is user-facing.
|
|
115
|
+
"""
|
|
116
|
+
eng = engine or _engine_or_fail()
|
|
117
|
+
if not image_exists(eng):
|
|
118
|
+
return False, f"image '{IMAGE_NAME}' not present locally"
|
|
119
|
+
label = image_label(eng)
|
|
120
|
+
if label is None:
|
|
121
|
+
return True, "" # legacy image, no label — accept
|
|
122
|
+
if label == __version__:
|
|
123
|
+
return True, ""
|
|
124
|
+
return False, (
|
|
125
|
+
f"image version label '{label}' != package version '{__version__}'. "
|
|
126
|
+
f"Consider `claw-bench build --no-cache` to rebuild."
|
|
127
|
+
)
|
clawbench/paths.py
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
"""Path helpers for the installed package.
|
|
2
|
+
|
|
3
|
+
Three kinds of locations:
|
|
4
|
+
|
|
5
|
+
1. **Bundled read-only data** inside the wheel — test cases, chrome extension,
|
|
6
|
+
dockerfile set, personal-info templates. Always accessed via
|
|
7
|
+
:func:`bundled_data_dir` (returns a real ``Path`` so it can be handed to
|
|
8
|
+
subprocess / ``docker build`` without further juggling).
|
|
9
|
+
|
|
10
|
+
2. **User config** — per-user mutable state. Chosen via :mod:`platformdirs` so
|
|
11
|
+
macOS gets ``~/Library/Application Support/claw-bench`` and Linux gets
|
|
12
|
+
``~/.config/claw-bench`` (respecting ``XDG_CONFIG_HOME``). Contains
|
|
13
|
+
``models.yaml``, ``config.json``, optional ``secrets.env``.
|
|
14
|
+
|
|
15
|
+
3. **Output directory** — where run artifacts land. Defaults to
|
|
16
|
+
``./claw-output/`` in the caller's current directory, overridable via
|
|
17
|
+
``--output-dir`` or ``CLAWBENCH_OUTPUT_DIR``.
|
|
18
|
+
|
|
19
|
+
We also migrate from the pre-package legacy dir ``~/.config/clawbench/`` on
|
|
20
|
+
first access so users coming from source installs keep their preferences.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import os
|
|
26
|
+
import shutil
|
|
27
|
+
from importlib import resources
|
|
28
|
+
from pathlib import Path
|
|
29
|
+
|
|
30
|
+
from platformdirs import PlatformDirs
|
|
31
|
+
|
|
32
|
+
_APP_NAME = "claw-bench"
|
|
33
|
+
_LEGACY_CONFIG_DIR = Path.home() / ".config" / "clawbench"
|
|
34
|
+
|
|
35
|
+
_dirs = PlatformDirs(_APP_NAME, appauthor=False)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def bundled_data_dir() -> Path:
|
|
39
|
+
"""Return the on-disk path to read-only bundled assets.
|
|
40
|
+
|
|
41
|
+
Uses ``importlib.resources.files("clawbench")`` which resolves to a real
|
|
42
|
+
filesystem path when the package is installed normally (wheel or editable).
|
|
43
|
+
We need a real ``Path`` rather than a ``Traversable`` because the
|
|
44
|
+
``docker build`` context and ``--load-extension`` need a real directory.
|
|
45
|
+
"""
|
|
46
|
+
root = resources.files("clawbench") / "data"
|
|
47
|
+
# ``files()`` returns a MultiplexedPath in rare cases (namespace packages);
|
|
48
|
+
# for single-package layouts it yields a PosixPath/WindowsPath directly.
|
|
49
|
+
return Path(str(root))
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def test_cases_dir() -> Path:
|
|
53
|
+
return bundled_data_dir() / "test-cases"
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def chrome_extension_dir() -> Path:
|
|
57
|
+
return bundled_data_dir() / "chrome-extension"
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def extension_server_dir() -> Path:
|
|
61
|
+
return bundled_data_dir() / "extension-server"
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def shared_dir() -> Path:
|
|
65
|
+
return bundled_data_dir() / "shared"
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def docker_build_dir() -> Path:
|
|
69
|
+
"""Directory containing Dockerfile + entrypoint.sh + setup-openclaw.sh."""
|
|
70
|
+
return bundled_data_dir() / "docker"
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def bundled_models_yaml() -> Path:
|
|
74
|
+
"""Seed template copied into the user config dir on first run.
|
|
75
|
+
|
|
76
|
+
We intentionally ship the *example* file, not the developer's live
|
|
77
|
+
``models.yaml``. The live file in the repo may contain real API keys
|
|
78
|
+
(OpenRouter et al.) committed for local convenience — those must not
|
|
79
|
+
land on PyPI where every wheel is permanently indexed."""
|
|
80
|
+
return bundled_data_dir() / "models" / "models.example.yaml"
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def user_config_dir() -> Path:
|
|
84
|
+
"""Platform-appropriate per-user config directory (created if missing)."""
|
|
85
|
+
d = Path(_dirs.user_config_dir)
|
|
86
|
+
d.mkdir(parents=True, exist_ok=True)
|
|
87
|
+
_migrate_legacy_config(d)
|
|
88
|
+
return d
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def user_models_yaml() -> Path:
|
|
92
|
+
"""Path to the user's editable models config. Seeded from the bundled
|
|
93
|
+
template on first access so the file always exists for the TUI editor."""
|
|
94
|
+
dst = user_config_dir() / "models.yaml"
|
|
95
|
+
if not dst.exists():
|
|
96
|
+
src = bundled_models_yaml()
|
|
97
|
+
if src.exists():
|
|
98
|
+
shutil.copyfile(src, dst)
|
|
99
|
+
else:
|
|
100
|
+
dst.write_text("# ClawBench models.yaml\nmodels: {}\n", encoding="utf-8")
|
|
101
|
+
return dst
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def user_config_json() -> Path:
|
|
105
|
+
"""TUI preferences (theme, last-used options)."""
|
|
106
|
+
return user_config_dir() / "config.json"
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def user_secrets_path() -> Path:
|
|
110
|
+
"""Optional persisted secrets file (PURELYMAIL_API_KEY etc).
|
|
111
|
+
|
|
112
|
+
Not created automatically — the CLI's ``configure --secrets`` writes it
|
|
113
|
+
with chmod 600. ``run`` / ``batch`` load it via python-dotenv if present.
|
|
114
|
+
"""
|
|
115
|
+
return user_config_dir() / "secrets.env"
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def default_output_dir() -> Path:
|
|
119
|
+
"""Default run output directory.
|
|
120
|
+
|
|
121
|
+
Order of precedence:
|
|
122
|
+
1. ``CLAWBENCH_OUTPUT_DIR`` environment variable.
|
|
123
|
+
2. ``./claw-output`` in the caller's current working directory.
|
|
124
|
+
"""
|
|
125
|
+
env = os.environ.get("CLAWBENCH_OUTPUT_DIR")
|
|
126
|
+
if env:
|
|
127
|
+
return Path(env).expanduser().resolve()
|
|
128
|
+
return Path.cwd() / "claw-output"
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _migrate_legacy_config(new_dir: Path) -> None:
|
|
132
|
+
"""One-shot migration from ``~/.config/clawbench/`` to the platformdirs
|
|
133
|
+
location. Copies files that don't already exist at the new location and
|
|
134
|
+
leaves the legacy dir alone so source installs keep working."""
|
|
135
|
+
if not _LEGACY_CONFIG_DIR.is_dir() or new_dir == _LEGACY_CONFIG_DIR:
|
|
136
|
+
return
|
|
137
|
+
for name in ("tui.json", "config.json", "models.yaml"):
|
|
138
|
+
src = _LEGACY_CONFIG_DIR / name
|
|
139
|
+
if not src.exists():
|
|
140
|
+
continue
|
|
141
|
+
# Normalize legacy tui.json filename to config.json going forward.
|
|
142
|
+
dst_name = "config.json" if name == "tui.json" else name
|
|
143
|
+
dst = new_dir / dst_name
|
|
144
|
+
if dst.exists():
|
|
145
|
+
continue
|
|
146
|
+
try:
|
|
147
|
+
shutil.copyfile(src, dst)
|
|
148
|
+
except OSError:
|
|
149
|
+
# Migration is best-effort; the CLI still works without it.
|
|
150
|
+
pass
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
{
|
|
2
|
+
"// NOTE": "All overlapping fields MUST match personal_info.json exactly.",
|
|
3
|
+
"header": {
|
|
4
|
+
"name": "Alex Green",
|
|
5
|
+
"title": "Senior Software Engineer",
|
|
6
|
+
"email": "dummy_email",
|
|
7
|
+
"location": "Toronto, ON, Canada"
|
|
8
|
+
},
|
|
9
|
+
"summary": "Senior Software Engineer with 23+ years of experience in full-stack development, distributed systems, and cloud infrastructure. PhD in Computer Science from the University of Toronto. Currently leading a backend team at Pinecrest Technologies Inc., building enterprise data pipeline solutions. Previously built real-time transaction processing systems in FinTech. AWS and Kubernetes certified.",
|
|
10
|
+
"experience": [
|
|
11
|
+
{
|
|
12
|
+
"title": "Senior Software Engineer",
|
|
13
|
+
"company": "Pinecrest Technologies Inc.",
|
|
14
|
+
"location": "Toronto, ON",
|
|
15
|
+
"dates": "Mar 2019 – Present",
|
|
16
|
+
"bullets": [
|
|
17
|
+
"Lead backend team of 5 engineers building distributed data pipelines for enterprise SaaS platform",
|
|
18
|
+
"Design and implement RESTful APIs serving 2M+ daily requests with sub-100ms p99 latency",
|
|
19
|
+
"Mentor junior developers and conduct code reviews, improving team velocity by 30%"
|
|
20
|
+
]
|
|
21
|
+
},
|
|
22
|
+
{
|
|
23
|
+
"title": "Software Engineer",
|
|
24
|
+
"company": "Crestridge Digital Corp.",
|
|
25
|
+
"location": "Toronto, ON",
|
|
26
|
+
"dates": "Jun 2012 - Feb 2019",
|
|
27
|
+
"bullets": [
|
|
28
|
+
"Developed real-time transaction processing systems handling $50M+ daily volume in FinTech",
|
|
29
|
+
"Built automated testing frameworks reducing QA cycle by 40%",
|
|
30
|
+
"Collaborated with product team on mobile banking features serving 500K+ users"
|
|
31
|
+
]
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
"title": "Software Developer",
|
|
35
|
+
"company": "Cedarbrook Solutions Ltd.",
|
|
36
|
+
"location": "Toronto, ON",
|
|
37
|
+
"dates": "Sep 2002 - May 2012",
|
|
38
|
+
"bullets": [
|
|
39
|
+
"Full-stack web development for enterprise clients across multiple industries",
|
|
40
|
+
"Database administration and performance optimization for high-traffic applications",
|
|
41
|
+
"Part-time during graduate studies (2002-2010); full-time from 2010"
|
|
42
|
+
]
|
|
43
|
+
}
|
|
44
|
+
],
|
|
45
|
+
"education": [
|
|
46
|
+
{
|
|
47
|
+
"degree": "Ph.D. in Computer Science",
|
|
48
|
+
"institution": "University of Toronto",
|
|
49
|
+
"dates": "2004 - 2010",
|
|
50
|
+
"detail": "Dissertation: Scalable Real-Time Data Pipeline Architectures for High-Throughput Transaction Processing"
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
"degree": "M.Sc. in Computer Science",
|
|
54
|
+
"institution": "University of Toronto",
|
|
55
|
+
"dates": "2002 - 2004",
|
|
56
|
+
"detail": "GPA: 3.8/4.0 | Thesis: Efficient Query Processing in Distributed Database Systems"
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
"degree": "B.Sc. in Computer Science",
|
|
60
|
+
"institution": "University of Toronto",
|
|
61
|
+
"dates": "1998 - 2002",
|
|
62
|
+
"detail": "GPA: 3.6/4.0 | Dean's List (2001, 2002)"
|
|
63
|
+
}
|
|
64
|
+
],
|
|
65
|
+
"skills": {
|
|
66
|
+
"languages": [
|
|
67
|
+
"Python",
|
|
68
|
+
"Java",
|
|
69
|
+
"TypeScript",
|
|
70
|
+
"Go"
|
|
71
|
+
],
|
|
72
|
+
"databases": [
|
|
73
|
+
"PostgreSQL",
|
|
74
|
+
"Redis"
|
|
75
|
+
],
|
|
76
|
+
"cloud_devops": [
|
|
77
|
+
"AWS",
|
|
78
|
+
"Docker",
|
|
79
|
+
"Kubernetes",
|
|
80
|
+
"Terraform",
|
|
81
|
+
"CI/CD"
|
|
82
|
+
],
|
|
83
|
+
"frameworks": [
|
|
84
|
+
"React",
|
|
85
|
+
"Node.js",
|
|
86
|
+
"GraphQL",
|
|
87
|
+
"REST API Design"
|
|
88
|
+
]
|
|
89
|
+
},
|
|
90
|
+
"certifications": [
|
|
91
|
+
"AWS Solutions Architect – Associate (2024)",
|
|
92
|
+
"Certified Kubernetes Administrator – CKA (2025)"
|
|
93
|
+
],
|
|
94
|
+
"languages": [
|
|
95
|
+
{
|
|
96
|
+
"language": "English",
|
|
97
|
+
"proficiency": "Native"
|
|
98
|
+
},
|
|
99
|
+
{
|
|
100
|
+
"language": "French",
|
|
101
|
+
"proficiency": "Intermediate (B1)"
|
|
102
|
+
}
|
|
103
|
+
]
|
|
104
|
+
}
|