hud-python 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (58) hide show
  1. hud/__init__.py +4 -3
  2. hud/adapters/claude/adapter.py +5 -14
  3. hud/adapters/common/adapter.py +3 -3
  4. hud/adapters/common/tests/__init__.py +0 -0
  5. hud/adapters/common/tests/test_adapter.py +277 -0
  6. hud/adapters/common/types.py +3 -3
  7. hud/adapters/operator/adapter.py +16 -23
  8. hud/agent/__init__.py +8 -1
  9. hud/agent/base.py +28 -28
  10. hud/agent/claude.py +69 -60
  11. hud/agent/langchain.py +32 -26
  12. hud/agent/operator.py +75 -67
  13. hud/env/__init__.py +5 -5
  14. hud/env/client.py +2 -2
  15. hud/env/docker_client.py +37 -39
  16. hud/env/environment.py +91 -66
  17. hud/env/local_docker_client.py +5 -7
  18. hud/env/remote_client.py +39 -32
  19. hud/env/remote_docker_client.py +13 -3
  20. hud/evaluators/__init__.py +2 -3
  21. hud/evaluators/base.py +4 -3
  22. hud/evaluators/inspect.py +3 -8
  23. hud/evaluators/judge.py +34 -58
  24. hud/evaluators/match.py +42 -49
  25. hud/evaluators/remote.py +13 -26
  26. hud/evaluators/tests/__init__.py +0 -0
  27. hud/evaluators/tests/test_inspect.py +12 -0
  28. hud/evaluators/tests/test_judge.py +231 -0
  29. hud/evaluators/tests/test_match.py +115 -0
  30. hud/evaluators/tests/test_remote.py +98 -0
  31. hud/exceptions.py +167 -0
  32. hud/gym.py +9 -7
  33. hud/job.py +179 -109
  34. hud/server/__init__.py +2 -2
  35. hud/server/requests.py +148 -186
  36. hud/server/tests/__init__.py +0 -0
  37. hud/server/tests/test_requests.py +275 -0
  38. hud/settings.py +3 -2
  39. hud/task.py +9 -19
  40. hud/taskset.py +44 -11
  41. hud/trajectory.py +6 -9
  42. hud/types.py +12 -9
  43. hud/utils/__init__.py +2 -2
  44. hud/utils/common.py +36 -15
  45. hud/utils/config.py +45 -30
  46. hud/utils/progress.py +34 -21
  47. hud/utils/telemetry.py +10 -11
  48. hud/utils/tests/__init__.py +0 -0
  49. hud/utils/tests/test_common.py +52 -0
  50. hud/utils/tests/test_config.py +129 -0
  51. hud/utils/tests/test_progress.py +225 -0
  52. hud/utils/tests/test_telemetry.py +37 -0
  53. hud/utils/tests/test_version.py +8 -0
  54. {hud_python-0.2.2.dist-info → hud_python-0.2.4.dist-info}/METADATA +9 -6
  55. hud_python-0.2.4.dist-info/RECORD +62 -0
  56. hud_python-0.2.2.dist-info/RECORD +0 -46
  57. {hud_python-0.2.2.dist-info → hud_python-0.2.4.dist-info}/WHEEL +0 -0
  58. {hud_python-0.2.2.dist-info → hud_python-0.2.4.dist-info}/licenses/LICENSE +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: SDK for the HUD evaluation platform.
5
5
  Project-URL: Homepage, https://github.com/hud-evals/hud-sdk
6
6
  Project-URL: Bug Tracker, https://github.com/hud-evals/hud-sdk/issues
@@ -59,8 +59,11 @@ Requires-Dist: jupyter-client; extra == 'dev'
59
59
  Requires-Dist: jupyter-core; extra == 'dev'
60
60
  Requires-Dist: openai; extra == 'dev'
61
61
  Requires-Dist: pyright==1.1.364; extra == 'dev'
62
+ Requires-Dist: pytest-asyncio; extra == 'dev'
63
+ Requires-Dist: pytest-cov; extra == 'dev'
64
+ Requires-Dist: pytest-mock; extra == 'dev'
62
65
  Requires-Dist: pytest<9,>=8.1.1; extra == 'dev'
63
- Requires-Dist: ruff==0.9.8; extra == 'dev'
66
+ Requires-Dist: ruff==0.11.8; extra == 'dev'
64
67
  Description-Content-Type: text/markdown
65
68
 
66
69
  # HUD
@@ -94,17 +97,17 @@ pip install hud-python
94
97
 
95
98
  ### Simple Browser Example with Claude Computer Use
96
99
 
97
- > This example uses the `@job("test-run")` decorator, so the results of this run will appear under the job named "test-run" on the your [HUD Jobs page](https://app.hud.so/jobs).
100
+ > This example uses the `@register_job("test-run")` decorator, so the results of this run will appear under the job named "test-run" on the your [HUD Jobs page](https://app.hud.so/jobs).
98
101
 
99
102
  Make sure your have defined your `ANTRHOPIC_API_KEY` in environment variables to run Claude.
100
103
 
101
104
  ```python
102
105
  import asyncio
103
- from hud import gym, job
106
+ from hud import gym, register_job
104
107
  from hud.task import Task
105
108
  from hud.agent import ClaudeAgent
106
109
 
107
- @job("test-run")
110
+ @register_job("test-run")
108
111
  async def main():
109
112
  task = Task(
110
113
  prompt="Insert the text 'capybara' into the search bar",
@@ -192,7 +195,7 @@ If you use this SDK in your research, please cite it as follows:
192
195
 
193
196
  ```bibtex
194
197
  @software{hud2025agentevalplatform,
195
- author = {HUD and Jay Ram and Lorenss Martinsons and Parth Patel and Max Muoto and Oskars Putans and Govind Pimpale and Mayank Singamreddy and Nguyen Nhat Minh},
198
+ author = {HUD and Jay Ram and Lorenss Martinsons and Parth Patel and Oskars Putans and Govind Pimpale and Mayank Singamreddy and Nguyen Nhat Minh},
196
199
  title = {{HUD: An Evaluation Platform for Agents}},
197
200
  date = {2025-04},
198
201
  url = {https://github.com/hud-evals/hud-sdk},
@@ -0,0 +1,62 @@
1
+ hud/__init__.py,sha256=cEmr9zVXS5upV_qcVePvApSjhPj0CmVjXolOaY3Ojuw,529
2
+ hud/exceptions.py,sha256=DNfaMmFGGAZsLwSVl8HILyQE5Eg5ygPLhBa7HLYcxU8,5167
3
+ hud/gym.py,sha256=hM1oH9DFFmxHLO9lIU4O-zkvRDCPZ5v-4BnfzNSE75E,3603
4
+ hud/job.py,sha256=RxXaFJQvcYbPCMIsuQRdrEyi_q3WMgdcNXypLmZqlbU,23351
5
+ hud/settings.py,sha256=1lOg2MieyBlmGmXiaiMk0mQvMj4N1-jcNeHCFLK5ZJA,1339
6
+ hud/task.py,sha256=2E4_BTqbfkertrPDkBM4M1CPYQ8Db-Iulv22f6dInWg,5112
7
+ hud/taskset.py,sha256=MgY9PyxKt4vO8775Qory04NA4o-BYTjirxBntslGPmk,3253
8
+ hud/trajectory.py,sha256=OrcRbxK_ejFp1VhJCjZnM1WCmCXxEOK4CxNjCngcsjo,3721
9
+ hud/types.py,sha256=_6eqLCOyxceMX_Oo8YwJQARHLb0N9zsEtc8ZCRJbf9I,2473
10
+ hud/adapters/__init__.py,sha256=zz24KdC_e9TJPgWo6y57_8SzevEE5ak4Cm6tXzMxwRk,266
11
+ hud/adapters/claude/__init__.py,sha256=i7QEF-29FLb9qxp1eYtXs-adIk_tG54tL-9g6d3xodk,100
12
+ hud/adapters/claude/adapter.py,sha256=xN8IHQH-xrIXxfWxoPV0VBHFvmsJZwq8PC4Ri_FU1Ew,5867
13
+ hud/adapters/common/__init__.py,sha256=BjdZWJVs_AKtpFrt-tNsdQRjnz7D97DFEQirJ-r0mp8,118
14
+ hud/adapters/common/adapter.py,sha256=fxdz1S6uwtHczLoDzmbuT0icQ49ywGMTiuVv8oJL4g4,5822
15
+ hud/adapters/common/types.py,sha256=k7Ic9YhgSWg3zZAtrFaOXINg06qbD1pYNzKs1obMFQE,5018
16
+ hud/adapters/common/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
+ hud/adapters/common/tests/test_adapter.py,sha256=rTD36LjvytHqMIyOLDyrn0RLIkd20s6f6dwoBEarJaw,8744
18
+ hud/adapters/operator/__init__.py,sha256=31vTRs268_TOLd-TeQRKau5bDYy78wxCNpJFhD5_l8U,104
19
+ hud/adapters/operator/adapter.py,sha256=P07FsvpS11BN4IoWNJUwUPkTW6QmJoboWhxCO7exCWs,2989
20
+ hud/agent/__init__.py,sha256=kolJUkXlqbpULqvV4ZHaeY7ABaDKvyOOcnB5aqGtIgw,321
21
+ hud/agent/base.py,sha256=AfysqGBhmOt1a24AXxrpq1TVI5UHxxv1oJbUd1-hKKo,3696
22
+ hud/agent/claude.py,sha256=Gz5ydfdQZFPM1e1D0qfprkNzJooAXzlXYXnrug0AIFk,7428
23
+ hud/agent/langchain.py,sha256=giAmw2w84nugToKm-5FMlpJSB7fJh4pa_e7hERTX1Qs,8720
24
+ hud/agent/operator.py,sha256=DPDN8oAXjiS44brljxYsYYIQtZdodgjYTDUeDqAiFFo,8156
25
+ hud/env/__init__.py,sha256=wVEesXMXM5hcNXQHt0-PN4-9RnE69DEnQENS7uJSv_Y,266
26
+ hud/env/client.py,sha256=brhfLkWGSuvxl3vqGMCQT-vXfj8rUbJMhE3zJg9WMDA,869
27
+ hud/env/docker_client.py,sha256=9ltQyz30u_iitbMK2kfzs1yhs9aeC4V65Y7A1lXl23A,10200
28
+ hud/env/environment.py,sha256=HWnrUUwjgqOLm7xRqH0MKeGw-3mZa_ARCEnolG6xvpY,14800
29
+ hud/env/local_docker_client.py,sha256=OqBnlQEV4t1TlhaU-maL3noBB8YNWWQnj-iWXPyrWYc,7751
30
+ hud/env/remote_client.py,sha256=080Yi2na4t5bXMgtRaRQNCmuY7dK898FyjQG6vv-0WE,6156
31
+ hud/env/remote_docker_client.py,sha256=5akBq7g-8I0DL0nsQHW9sUAbau6PyNyDNb77ygc_-r4,7183
32
+ hud/evaluators/__init__.py,sha256=V5nktEAw3EDn2Y537pjia5Y1IjdLBIPrDjTs6YTCdX4,153
33
+ hud/evaluators/base.py,sha256=ALO9Rj-R_9HtHIHYp84bsQQD12De0XnCTwad78_T5-k,771
34
+ hud/evaluators/inspect.py,sha256=ZvrTXLpgibyvQ5aNXAMP4quyXISrRQHg9besDcuCx7U,692
35
+ hud/evaluators/judge.py,sha256=N3gEQGwVin9Ir80wWw6VtaL0xrlzitbmItaLm0he5gY,5962
36
+ hud/evaluators/match.py,sha256=8YVQD942myX72Jkme2JFIVlmKhFXEa3CgGTjLC8O5n4,4701
37
+ hud/evaluators/remote.py,sha256=kmD_XIU20KvX0NKgaEEKTTKHp0KVRa_3jUEgONh2nkY,2054
38
+ hud/evaluators/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
+ hud/evaluators/tests/test_inspect.py,sha256=8dMjgQfXOJGcS8gP6TzoBbQiG_NYuRL6IobMG7euJdU,376
40
+ hud/evaluators/tests/test_judge.py,sha256=c1GaAeq_WpBVgBlx-gQncHrOPokzKNxlbgiC8W8hxYI,7829
41
+ hud/evaluators/tests/test_match.py,sha256=C04GoluyT9i41YZ65xEjN7tKHQbENbrpNhNtUd4ivmA,3919
42
+ hud/evaluators/tests/test_remote.py,sha256=YdJpyyuRLkYP0e3jTUkD3zobS2WHQPePn8yBZtYOIN4,3243
43
+ hud/server/__init__.py,sha256=IPxPCqtPLguryN-nBq78Sakypw2bRiE2iHv3SXG8YRk,139
44
+ hud/server/requests.py,sha256=U0WEeY8AzsLMSICebxQyF0FEbXpKieh-3Qnisd0C6a4,7881
45
+ hud/server/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
+ hud/server/tests/test_requests.py,sha256=63YCbykcib5MxKxm-OgHJPLX3QC7hmgIwnWaYukVM6s,9077
47
+ hud/utils/__init__.py,sha256=oSl_gGoS272X2VFnBYX8hLxcP2xgGoBYQXAuLhtQgw8,260
48
+ hud/utils/common.py,sha256=nAhGgcOo132p5Qg48u6WV_NVN-oiag-4To--C1SYSDU,3052
49
+ hud/utils/config.py,sha256=kGVuR7oKXjmJfeUAnbt53GesCJf_HPvvV1R6s9xjvq4,3549
50
+ hud/utils/progress.py,sha256=suikwFM8sdSfkV10nAOEaInDhG4XKgOSvFePg4jSj1A,5927
51
+ hud/utils/telemetry.py,sha256=hrVIx2rUjSGyy9IVxTZ_3Jii83PiHjyFRd5ls2whimM,1863
52
+ hud/utils/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
+ hud/utils/tests/test_common.py,sha256=gbYpQKBNdbCcEH0v1UZpxLt_NW2T5sETMIJKvy8S8pY,1658
54
+ hud/utils/tests/test_config.py,sha256=dPlXYWuMrxX-NOYbf0vdJ27TJpfacKG8eiKOSGOcfDU,4079
55
+ hud/utils/tests/test_progress.py,sha256=QunwDgi_heQXhDgmC25zgjr-sFUu5FdJ_1aYigMKeIc,6351
56
+ hud/utils/tests/test_telemetry.py,sha256=t0An1RTBaE0dZVEpF4uwuq5k1R-PXFR5k4u71h60tx8,1224
57
+ hud/utils/tests/test_version.py,sha256=bzqe0zpgWpVjnYJR_bfGi-eikKPlSkuv959T1hKBTT8,159
58
+ hud/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
59
+ hud_python-0.2.4.dist-info/METADATA,sha256=tXUQsMPRvY2wG_2YpAAcmJpNOv9NHEOOtC4Rxgh_3SI,8108
60
+ hud_python-0.2.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
61
+ hud_python-0.2.4.dist-info/licenses/LICENSE,sha256=yIzBheVUf86FC1bztAcr7RYWWNxyd3B-UJQ3uddg1HA,1078
62
+ hud_python-0.2.4.dist-info/RECORD,,
@@ -1,46 +0,0 @@
1
- hud/__init__.py,sha256=XJXuALIb-pRnnVdfEkjpuiLtS77WD3Idv5VOLECY3eo,488
2
- hud/gym.py,sha256=ErNwJgCJVhWZHzMILfzVXX0Dawh5Cy0nIQWWh7fsKW4,3641
3
- hud/job.py,sha256=IvW2sBFoQpExXVi2FL3cEwnrxVIGp8RBfVj2s8edn20,22387
4
- hud/settings.py,sha256=rv8TiZx4wmBzIoEEkOzoywC0nt8UZXlHxIa_LW4tWAg,1346
5
- hud/task.py,sha256=kuP69hIxV0ZsHRsZ1XEq6lzYnUSD3b6ywWzloCGW5DU,5380
6
- hud/taskset.py,sha256=xDPBXeDm4AlSOwl-MM98lN0x6PmGV8t9jv7sNyS_u0c,2426
7
- hud/trajectory.py,sha256=PA-sE2iyt2BctO2Dex-2ZaRmS95AkEXTicZjHCVCYqE,3749
8
- hud/types.py,sha256=D_OGPutR55PlWrUDqehYLlR-FqQp9GyKlxJhNmCRyFE,2485
9
- hud/adapters/__init__.py,sha256=zz24KdC_e9TJPgWo6y57_8SzevEE5ak4Cm6tXzMxwRk,266
10
- hud/adapters/claude/__init__.py,sha256=i7QEF-29FLb9qxp1eYtXs-adIk_tG54tL-9g6d3xodk,100
11
- hud/adapters/claude/adapter.py,sha256=viZDCNjM6aCCfpxt3PIxfVOz3rrlOgZli5WyHUxEGjc,6079
12
- hud/adapters/common/__init__.py,sha256=BjdZWJVs_AKtpFrt-tNsdQRjnz7D97DFEQirJ-r0mp8,118
13
- hud/adapters/common/adapter.py,sha256=ls-gXtg1N_SQc211rkDb3LL511HNZv6etm1nx2ZtrkQ,5808
14
- hud/adapters/common/types.py,sha256=9RWLZp6sViu9uPSU5K8-TRaQkdirunxZfDsPIxAR_TM,4995
15
- hud/adapters/operator/__init__.py,sha256=31vTRs268_TOLd-TeQRKau5bDYy78wxCNpJFhD5_l8U,104
16
- hud/adapters/operator/adapter.py,sha256=NNbNYPqSquIh4KHCk9aN7dARe7yPUx0J2kDIk-N015s,3309
17
- hud/agent/__init__.py,sha256=qdCWY6wthkTpyq7SWT1JYAYu1eXk4LfdSAcAfKt0Ohs,294
18
- hud/agent/base.py,sha256=RThJ_h4A3oU23zyvvKtxY2a_YM03Vd1XYDXdY3bAf8g,3881
19
- hud/agent/claude.py,sha256=tbDKAzGCLJPnUnHc8eV-zZmj3ZG6QQx0ukWKoO4Ekec,7445
20
- hud/agent/langchain.py,sha256=9ow74ENcJmZ_muzoMdG2tz5VhvAHm2zKiemphHZm-Pg,8683
21
- hud/agent/operator.py,sha256=44t19TzcCrS1N3-rnD25ZLXx5s4Io8On27LomALuugs,8185
22
- hud/env/__init__.py,sha256=BHFY_N0kEI142pjWtMyqUb3BGnoiekY8evRCIbSbO2w,271
23
- hud/env/client.py,sha256=SPR6ct6NFxmIrgIi3K8tEC-vnqOmCbCBtuT81PaVjuY,869
24
- hud/env/docker_client.py,sha256=56_u3Ri4NulGcBumAg-7-KilmFmBKthOwEIM5bOLOZc,10418
25
- hud/env/environment.py,sha256=Xyq4KQO9aWYPwZ0uESAetB5EEZgmlEnZVc7sA0DLz2c,13706
26
- hud/env/local_docker_client.py,sha256=TCD9z1qjafxjwAWLatAL8d587_ioMDHjs8T5cBgusr8,7789
27
- hud/env/remote_client.py,sha256=XDKmr5ImLBMZn-ToPrXnc4iBNRwDwzPtQIXEcgShbhE,5977
28
- hud/env/remote_docker_client.py,sha256=FwaO7NyygDt9oe3pDD7PwUS21pxzc465mwcXk-Cx-60,6838
29
- hud/evaluators/__init__.py,sha256=XophB666xPnurhQ_ygfW44h0Jh0BQGCgUzCXEOG2Q1g,158
30
- hud/evaluators/base.py,sha256=CNbrvFWQfl1YuBxJKzuG4_TBAdAf0TOQA3hl7eGsbaA,782
31
- hud/evaluators/inspect.py,sha256=eTu9E2eBFe4jd4sPtXL0_vGIEY8aiEmTgmN__v77jvI,735
32
- hud/evaluators/judge.py,sha256=0T9DHFRR38oH2X1d87t58SBSAhbKWRH5PlljzCa3rkg,6449
33
- hud/evaluators/match.py,sha256=iFJ_qqaHJQ19TICdQPjHgTubBhq7izCIET5qU_104Fk,4719
34
- hud/evaluators/remote.py,sha256=NVUJJvrpGQj2eL-aFxzTEnAWW7iuSI9eDWtar54dc6E,2174
35
- hud/server/__init__.py,sha256=cxDKTwMdGzhj7bYajtejN8XCt7K8Xq3eKB2No0qBpoY,169
36
- hud/server/requests.py,sha256=s8LZZYWT1wl7lPu2vwRaYPZs9_gjKwSg3LZLvS5-s6E,9085
37
- hud/utils/__init__.py,sha256=LnoI2tQUnd-mQ4eg-gpJJgmHBBIhggJ6c9ap7MBgrfs,260
38
- hud/utils/common.py,sha256=xJWBF2KTAQKYMGFq5hJWcwpcHAEYY3so4ZqvZYf1BjU,2778
39
- hud/utils/config.py,sha256=Evu2nUCYaujpWXXwLprsgr_KFUkWuSdkibmLRJ_iq64,3281
40
- hud/utils/progress.py,sha256=gP7_NXG0m_bhNaYPwrwUOeNumwjx4ewjXP7v-_0Lsj0,5684
41
- hud/utils/telemetry.py,sha256=md7AuKxtDqsONMeeTOHen1XpmNds8CbXROX_PnkDxFc,1993
42
- hud/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
43
- hud_python-0.2.2.dist-info/METADATA,sha256=I26pZPqv8O5r36BNehTmJuYQjtbRu-C3bCjt37Iwync,7963
44
- hud_python-0.2.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
45
- hud_python-0.2.2.dist-info/licenses/LICENSE,sha256=yIzBheVUf86FC1bztAcr7RYWWNxyd3B-UJQ3uddg1HA,1078
46
- hud_python-0.2.2.dist-info/RECORD,,