mini-swe-agent 1.17.1__tar.gz → 1.17.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. {mini_swe_agent-1.17.1/src/mini_swe_agent.egg-info → mini_swe_agent-1.17.3}/PKG-INFO +8 -8
  2. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/README.md +7 -7
  3. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3/src/mini_swe_agent.egg-info}/PKG-INFO +8 -8
  4. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/__init__.py +1 -1
  5. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/agents/default.py +9 -1
  6. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/models/litellm_model.py +2 -1
  7. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/models/litellm_response_api_model.py +4 -2
  8. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/models/openrouter_model.py +2 -1
  9. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/models/portkey_model.py +2 -1
  10. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/models/portkey_response_api_model.py +1 -0
  11. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/models/requesty_model.py +2 -1
  12. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/run/extra/utils/batch_progress.py +2 -2
  13. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/LICENSE.md +0 -0
  14. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/pyproject.toml +0 -0
  15. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/setup.cfg +0 -0
  16. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/mini_swe_agent.egg-info/SOURCES.txt +0 -0
  17. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/mini_swe_agent.egg-info/dependency_links.txt +0 -0
  18. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/mini_swe_agent.egg-info/entry_points.txt +0 -0
  19. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/mini_swe_agent.egg-info/requires.txt +0 -0
  20. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/mini_swe_agent.egg-info/top_level.txt +0 -0
  21. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/__main__.py +0 -0
  22. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/agents/__init__.py +0 -0
  23. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/agents/interactive.py +0 -0
  24. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/agents/interactive_textual.py +0 -0
  25. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/config/README.md +0 -0
  26. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/config/__init__.py +0 -0
  27. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/config/default.yaml +0 -0
  28. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/config/extra/__init__.py +0 -0
  29. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/config/extra/swebench.yaml +0 -0
  30. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/config/extra/swebench_roulette.yaml +0 -0
  31. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/config/extra/swebench_xml.yaml +0 -0
  32. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/config/github_issue.yaml +0 -0
  33. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/config/mini.tcss +0 -0
  34. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/config/mini.yaml +0 -0
  35. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/environments/__init__.py +0 -0
  36. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/environments/docker.py +0 -0
  37. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/environments/extra/__init__.py +0 -0
  38. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/environments/extra/bubblewrap.py +0 -0
  39. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/environments/extra/swerex_docker.py +0 -0
  40. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/environments/local.py +0 -0
  41. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/environments/singularity.py +0 -0
  42. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/models/__init__.py +0 -0
  43. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/models/anthropic.py +0 -0
  44. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/models/extra/__init__.py +0 -0
  45. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/models/extra/roulette.py +0 -0
  46. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/models/test_models.py +0 -0
  47. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/models/utils/__init__.py +0 -0
  48. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/models/utils/cache_control.py +0 -0
  49. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/models/utils/key_per_thread.py +0 -0
  50. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/models/utils/openai_utils.py +0 -0
  51. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/py.typed +0 -0
  52. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/run/__init__.py +0 -0
  53. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/run/extra/__init__.py +0 -0
  54. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/run/extra/config.py +0 -0
  55. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/run/extra/swebench.py +0 -0
  56. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/run/extra/swebench_single.py +0 -0
  57. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/run/extra/utils/__init__.py +0 -0
  58. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/run/github_issue.py +0 -0
  59. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/run/hello_world.py +0 -0
  60. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/run/inspector.py +0 -0
  61. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/run/mini.py +0 -0
  62. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/run/mini_extra.py +0 -0
  63. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/run/utils/__init__.py +0 -0
  64. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/run/utils/save.py +0 -0
  65. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/utils/__init__.py +0 -0
  66. {mini_swe_agent-1.17.1 → mini_swe_agent-1.17.3}/src/minisweagent/utils/log.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mini-swe-agent
3
- Version: 1.17.1
3
+ Version: 1.17.3
4
4
  Summary: Nano SWE Agent - A simple AI software engineering agent
5
5
  Author-email: Kilian Lieret <kilian.lieret@posteo.de>, "Carlos E. Jimenez" <carlosej@princeton.edu>
6
6
  License: MIT License
@@ -86,21 +86,21 @@ In 2024, [SWE-bench](https://github.com/swe-bench/SWE-bench) & [SWE-agent](https
86
86
 
87
87
  We now ask: **What if SWE-agent was 100x smaller, and still worked nearly as well?**
88
88
 
89
- `mini` is for
89
+ The `mini` agent is for
90
90
 
91
91
  - **Researchers** who want to **[benchmark](https://swe-bench.com), [fine-tune](https://swesmith.com/) or RL** without assumptions, bloat, or surprises
92
- - **Developers** who like their tools like their scripts: **short, sharp, and readable**
92
+ - **Developers** who like to **own, understand, and modify** their tools
93
93
  - **Engineers** who want something **trivial to sandbox & to deploy anywhere**
94
94
 
95
95
  Here's some details:
96
96
 
97
97
  - **Minimal**: Just [100 lines of python](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/agents/default.py) (+100 total for [env](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/environments/local.py),
98
98
  [model](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/models/litellm_model.py), [script](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/run/hello_world.py)) — no fancy dependencies!
99
- - **Powerful:** Resolves >74% of GitHub issues in the [SWE-bench verified benchmark](https://www.swebench.com/) ([leaderboard](https://swe-bench.com/)).
100
- - **Convenient:** Comes with UIs that turn this into your daily dev swiss army knife!
99
+ - **Performant:** Scores >74% on the [SWE-bench verified benchmark](https://www.swebench.com/) benchmark; starts faster than Claude Code
101
100
  - **Deployable:** In addition to local envs, you can use **docker**, **podman**, **singularity**, **apptainer**, and more
102
- - **Tested:** [![Codecov](https://img.shields.io/codecov/c/github/swe-agent/mini-swe-agent?style=flat-square)](https://codecov.io/gh/SWE-agent/mini-swe-agent)
103
101
  - **Cutting edge:** Built by the Princeton & Stanford team behind [SWE-bench](https://swebench.com) and [SWE-agent](https://swe-agent.com).
102
+ - **Widely adopted:** In use by Meta, NVIDIA, Essential AI, Anyscale, and others
103
+ - **Tested:** [![Codecov](https://img.shields.io/codecov/c/github/swe-agent/mini-swe-agent?style=flat-square)](https://codecov.io/gh/SWE-agent/mini-swe-agent)
104
104
 
105
105
  <details>
106
106
 
@@ -108,7 +108,7 @@ Here's some details:
108
108
 
109
109
  [SWE-agent](https://swe-agent.com/latest/) jump-started the development of AI agents in 2024. Back then, we placed a lot of emphasis on tools and special interfaces for the agent.
110
110
  However, one year later, as LMs have become more capable, a lot of this is not needed at all to build a useful agent!
111
- In fact, mini-SWE-agent
111
+ In fact, the `mini` agent
112
112
 
113
113
  - **Does not have any tools other than bash** — it doesn't even use the tool-calling interface of the LMs.
114
114
  This means that you can run it with literally any model. When running in sandboxed environments you also don't need to take care
@@ -131,7 +131,7 @@ You can see the result on the [SWE-bench (bash only)](https://www.swebench.com/)
131
131
 
132
132
  Some agents are overfitted research artifacts. Others are UI-heavy frontend monsters.
133
133
 
134
- `mini` wants to be a hackable tool, not a black box.
134
+ The `mini` agent wants to be a hackable tool, not a black box.
135
135
 
136
136
  - **Simple** enough to understand at a glance
137
137
  - **Convenient** enough to use in daily workflows
@@ -15,21 +15,21 @@ In 2024, [SWE-bench](https://github.com/swe-bench/SWE-bench) & [SWE-agent](https
15
15
 
16
16
  We now ask: **What if SWE-agent was 100x smaller, and still worked nearly as well?**
17
17
 
18
- `mini` is for
18
+ The `mini` agent is for
19
19
 
20
20
  - **Researchers** who want to **[benchmark](https://swe-bench.com), [fine-tune](https://swesmith.com/) or RL** without assumptions, bloat, or surprises
21
- - **Developers** who like their tools like their scripts: **short, sharp, and readable**
21
+ - **Developers** who like to **own, understand, and modify** their tools
22
22
  - **Engineers** who want something **trivial to sandbox & to deploy anywhere**
23
23
 
24
24
  Here's some details:
25
25
 
26
26
  - **Minimal**: Just [100 lines of python](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/agents/default.py) (+100 total for [env](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/environments/local.py),
27
27
  [model](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/models/litellm_model.py), [script](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/run/hello_world.py)) — no fancy dependencies!
28
- - **Powerful:** Resolves >74% of GitHub issues in the [SWE-bench verified benchmark](https://www.swebench.com/) ([leaderboard](https://swe-bench.com/)).
29
- - **Convenient:** Comes with UIs that turn this into your daily dev swiss army knife!
28
+ - **Performant:** Scores >74% on the [SWE-bench verified benchmark](https://www.swebench.com/) benchmark; starts faster than Claude Code
30
29
  - **Deployable:** In addition to local envs, you can use **docker**, **podman**, **singularity**, **apptainer**, and more
31
- - **Tested:** [![Codecov](https://img.shields.io/codecov/c/github/swe-agent/mini-swe-agent?style=flat-square)](https://codecov.io/gh/SWE-agent/mini-swe-agent)
32
30
  - **Cutting edge:** Built by the Princeton & Stanford team behind [SWE-bench](https://swebench.com) and [SWE-agent](https://swe-agent.com).
31
+ - **Widely adopted:** In use by Meta, NVIDIA, Essential AI, Anyscale, and others
32
+ - **Tested:** [![Codecov](https://img.shields.io/codecov/c/github/swe-agent/mini-swe-agent?style=flat-square)](https://codecov.io/gh/SWE-agent/mini-swe-agent)
33
33
 
34
34
  <details>
35
35
 
@@ -37,7 +37,7 @@ Here's some details:
37
37
 
38
38
  [SWE-agent](https://swe-agent.com/latest/) jump-started the development of AI agents in 2024. Back then, we placed a lot of emphasis on tools and special interfaces for the agent.
39
39
  However, one year later, as LMs have become more capable, a lot of this is not needed at all to build a useful agent!
40
- In fact, mini-SWE-agent
40
+ In fact, the `mini` agent
41
41
 
42
42
  - **Does not have any tools other than bash** — it doesn't even use the tool-calling interface of the LMs.
43
43
  This means that you can run it with literally any model. When running in sandboxed environments you also don't need to take care
@@ -60,7 +60,7 @@ You can see the result on the [SWE-bench (bash only)](https://www.swebench.com/)
60
60
 
61
61
  Some agents are overfitted research artifacts. Others are UI-heavy frontend monsters.
62
62
 
63
- `mini` wants to be a hackable tool, not a black box.
63
+ The `mini` agent wants to be a hackable tool, not a black box.
64
64
 
65
65
  - **Simple** enough to understand at a glance
66
66
  - **Convenient** enough to use in daily workflows
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mini-swe-agent
3
- Version: 1.17.1
3
+ Version: 1.17.3
4
4
  Summary: Nano SWE Agent - A simple AI software engineering agent
5
5
  Author-email: Kilian Lieret <kilian.lieret@posteo.de>, "Carlos E. Jimenez" <carlosej@princeton.edu>
6
6
  License: MIT License
@@ -86,21 +86,21 @@ In 2024, [SWE-bench](https://github.com/swe-bench/SWE-bench) & [SWE-agent](https
86
86
 
87
87
  We now ask: **What if SWE-agent was 100x smaller, and still worked nearly as well?**
88
88
 
89
- `mini` is for
89
+ The `mini` agent is for
90
90
 
91
91
  - **Researchers** who want to **[benchmark](https://swe-bench.com), [fine-tune](https://swesmith.com/) or RL** without assumptions, bloat, or surprises
92
- - **Developers** who like their tools like their scripts: **short, sharp, and readable**
92
+ - **Developers** who like to **own, understand, and modify** their tools
93
93
  - **Engineers** who want something **trivial to sandbox & to deploy anywhere**
94
94
 
95
95
  Here's some details:
96
96
 
97
97
  - **Minimal**: Just [100 lines of python](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/agents/default.py) (+100 total for [env](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/environments/local.py),
98
98
  [model](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/models/litellm_model.py), [script](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/run/hello_world.py)) — no fancy dependencies!
99
- - **Powerful:** Resolves >74% of GitHub issues in the [SWE-bench verified benchmark](https://www.swebench.com/) ([leaderboard](https://swe-bench.com/)).
100
- - **Convenient:** Comes with UIs that turn this into your daily dev swiss army knife!
99
+ - **Performant:** Scores >74% on the [SWE-bench verified benchmark](https://www.swebench.com/) benchmark; starts faster than Claude Code
101
100
  - **Deployable:** In addition to local envs, you can use **docker**, **podman**, **singularity**, **apptainer**, and more
102
- - **Tested:** [![Codecov](https://img.shields.io/codecov/c/github/swe-agent/mini-swe-agent?style=flat-square)](https://codecov.io/gh/SWE-agent/mini-swe-agent)
103
101
  - **Cutting edge:** Built by the Princeton & Stanford team behind [SWE-bench](https://swebench.com) and [SWE-agent](https://swe-agent.com).
102
+ - **Widely adopted:** In use by Meta, NVIDIA, Essential AI, Anyscale, and others
103
+ - **Tested:** [![Codecov](https://img.shields.io/codecov/c/github/swe-agent/mini-swe-agent?style=flat-square)](https://codecov.io/gh/SWE-agent/mini-swe-agent)
104
104
 
105
105
  <details>
106
106
 
@@ -108,7 +108,7 @@ Here's some details:
108
108
 
109
109
  [SWE-agent](https://swe-agent.com/latest/) jump-started the development of AI agents in 2024. Back then, we placed a lot of emphasis on tools and special interfaces for the agent.
110
110
  However, one year later, as LMs have become more capable, a lot of this is not needed at all to build a useful agent!
111
- In fact, mini-SWE-agent
111
+ In fact, the `mini` agent
112
112
 
113
113
  - **Does not have any tools other than bash** — it doesn't even use the tool-calling interface of the LMs.
114
114
  This means that you can run it with literally any model. When running in sandboxed environments you also don't need to take care
@@ -131,7 +131,7 @@ You can see the result on the [SWE-bench (bash only)](https://www.swebench.com/)
131
131
 
132
132
  Some agents are overfitted research artifacts. Others are UI-heavy frontend monsters.
133
133
 
134
- `mini` wants to be a hackable tool, not a black box.
134
+ The `mini` agent wants to be a hackable tool, not a black box.
135
135
 
136
136
  - **Simple** enough to understand at a glance
137
137
  - **Convenient** enough to use in daily workflows
@@ -8,7 +8,7 @@ This file provides:
8
8
  unless you want the static type checking.
9
9
  """
10
10
 
11
- __version__ = "1.17.1"
11
+ __version__ = "1.17.3"
12
12
 
13
13
  import os
14
14
  from pathlib import Path
@@ -20,7 +20,15 @@ class AgentConfig:
20
20
  )
21
21
  timeout_template: str = (
22
22
  "The last command <command>{{action['action']}}</command> timed out and has been killed.\n"
23
- "The output of the command was:\n <output>\n{{output}}\n</output>\n"
23
+ "The output of the command was:\n"
24
+ "{% if output | length < 10000 -%}\n"
25
+ "<output>\n{{output}}\n</output>\n"
26
+ "{%- else -%}\n"
27
+ "<warning>Output was too long and has been truncated.</warning>\n"
28
+ "<output_head>\n{{ output[:5000] }}\n</output_head>\n"
29
+ "<elided_chars>{{ output | length - 10000 }} characters elided</elided_chars>\n"
30
+ "<output_tail>\n{{ output[-5000:] }}\n</output_tail>\n"
31
+ "{%- endif %}\n"
24
32
  "Please try another command and make sure to avoid those requiring interactive input."
25
33
  )
26
34
  format_error_template: str = "Please always provide EXACTLY ONE action in triple backticks."
@@ -41,6 +41,7 @@ class LitellmModel:
41
41
  litellm.utils.register_model(json.loads(Path(self.config.litellm_model_registry).read_text()))
42
42
 
43
43
  @retry(
44
+ reraise=True,
44
45
  stop=stop_after_attempt(int(os.getenv("MSWEA_MODEL_RETRY_STOP_AFTER_ATTEMPT", "10"))),
45
46
  wait=wait_exponential(multiplier=1, min=4, max=60),
46
47
  before_sleep=before_sleep_log(logger, logging.WARNING),
@@ -68,7 +69,7 @@ class LitellmModel:
68
69
  def query(self, messages: list[dict[str, str]], **kwargs) -> dict:
69
70
  if self.config.set_cache_control:
70
71
  messages = set_cache_control(messages, mode=self.config.set_cache_control)
71
- response = self._query(messages, **kwargs)
72
+ response = self._query([{"role": msg["role"], "content": msg["content"]} for msg in messages], **kwargs)
72
73
  try:
73
74
  cost = litellm.cost_calculator.completion_cost(response, model=self.config.model_name)
74
75
  if cost <= 0.0:
@@ -28,6 +28,7 @@ class LitellmResponseAPIModel(LitellmModel):
28
28
  self._previous_response_id: str | None = None
29
29
 
30
30
  @retry(
31
+ reraise=True,
31
32
  stop=stop_after_attempt(10),
32
33
  wait=wait_exponential(multiplier=1, min=4, max=60),
33
34
  before_sleep=before_sleep_log(logger, logging.WARNING),
@@ -45,9 +46,11 @@ class LitellmResponseAPIModel(LitellmModel):
45
46
  )
46
47
  def _query(self, messages: list[dict[str, str]], **kwargs):
47
48
  try:
49
+ # Remove 'timestamp' field added by agent - not supported by OpenAI responses API
50
+ clean_messages = [{"role": msg["role"], "content": msg["content"]} for msg in messages]
48
51
  resp = litellm.responses(
49
52
  model=self.config.model_name,
50
- input=messages if self._previous_response_id is None else messages[-1:],
53
+ input=clean_messages if self._previous_response_id is None else clean_messages[-1:],
51
54
  previous_response_id=self._previous_response_id,
52
55
  **(self.config.model_kwargs | kwargs),
53
56
  )
@@ -59,7 +62,6 @@ class LitellmResponseAPIModel(LitellmModel):
59
62
 
60
63
  def query(self, messages: list[dict[str, str]], **kwargs) -> dict:
61
64
  response = self._query(messages, **kwargs)
62
- print(response)
63
65
  text = coerce_responses_text(response)
64
66
  try:
65
67
  cost = litellm.cost_calculator.completion_cost(response, model=self.config.model_name)
@@ -56,6 +56,7 @@ class OpenRouterModel:
56
56
  self._api_key = os.getenv("OPENROUTER_API_KEY", "")
57
57
 
58
58
  @retry(
59
+ reraise=True,
59
60
  stop=stop_after_attempt(int(os.getenv("MSWEA_MODEL_RETRY_STOP_AFTER_ATTEMPT", "10"))),
60
61
  wait=wait_exponential(multiplier=1, min=4, max=60),
61
62
  before_sleep=before_sleep_log(logger, logging.WARNING),
@@ -97,7 +98,7 @@ class OpenRouterModel:
97
98
  def query(self, messages: list[dict[str, str]], **kwargs) -> dict:
98
99
  if self.config.set_cache_control:
99
100
  messages = set_cache_control(messages, mode=self.config.set_cache_control)
100
- response = self._query(messages, **kwargs)
101
+ response = self._query([{"role": msg["role"], "content": msg["content"]} for msg in messages], **kwargs)
101
102
 
102
103
  usage = response.get("usage", {})
103
104
  cost = usage.get("cost", 0.0)
@@ -74,6 +74,7 @@ class PortkeyModel:
74
74
  self.client = Portkey(**client_kwargs)
75
75
 
76
76
  @retry(
77
+ reraise=True,
77
78
  stop=stop_after_attempt(int(os.getenv("MSWEA_MODEL_RETRY_STOP_AFTER_ATTEMPT", "10"))),
78
79
  wait=wait_exponential(multiplier=1, min=4, max=60),
79
80
  before_sleep=before_sleep_log(logger, logging.WARNING),
@@ -90,7 +91,7 @@ class PortkeyModel:
90
91
  def query(self, messages: list[dict[str, str]], **kwargs) -> dict:
91
92
  if self.config.set_cache_control:
92
93
  messages = set_cache_control(messages, mode=self.config.set_cache_control)
93
- response = self._query(messages, **kwargs)
94
+ response = self._query([{"role": msg["role"], "content": msg["content"]} for msg in messages], **kwargs)
94
95
  cost = self._calculate_cost(response)
95
96
  self.n_calls += 1
96
97
  self.cost += cost
@@ -30,6 +30,7 @@ class PortkeyResponseAPIModel(PortkeyModel):
30
30
  self._previous_response_id: str | None = None
31
31
 
32
32
  @retry(
33
+ reraise=True,
33
34
  stop=stop_after_attempt(int(os.getenv("MSWEA_MODEL_RETRY_STOP_AFTER_ATTEMPT", "10"))),
34
35
  wait=wait_exponential(multiplier=1, min=4, max=60),
35
36
  before_sleep=before_sleep_log(logger, logging.WARNING),
@@ -51,6 +51,7 @@ class RequestyModel:
51
51
  self._api_key = os.getenv("REQUESTY_API_KEY", "")
52
52
 
53
53
  @retry(
54
+ reraise=True,
54
55
  stop=stop_after_attempt(10),
55
56
  wait=wait_exponential(multiplier=1, min=4, max=60),
56
57
  before_sleep=before_sleep_log(logger, logging.WARNING),
@@ -91,7 +92,7 @@ class RequestyModel:
91
92
  raise RequestyAPIError(f"Request failed: {e}") from e
92
93
 
93
94
  def query(self, messages: list[dict[str, str]], **kwargs) -> dict:
94
- response = self._query(messages, **kwargs)
95
+ response = self._query([{"role": msg["role"], "content": msg["content"]} for msg in messages], **kwargs)
95
96
 
96
97
  # Extract cost from usage information
97
98
  usage = response.get("usage", {})
@@ -79,7 +79,7 @@ class RunBatchProgressManager:
79
79
  "[cyan]Overall Progress", total=num_instances, total_cost="0.00", eta=""
80
80
  )
81
81
 
82
- self.render_group = Group(Table(), self._task_progress_bar, self._main_progress_bar)
82
+ self.render_group = Group(self._main_progress_bar, Table(), self._task_progress_bar)
83
83
  self._yaml_report_path = yaml_report_path
84
84
 
85
85
  @property
@@ -112,7 +112,7 @@ class RunBatchProgressManager:
112
112
  instances_str = _shorten_str(", ".join(reversed(instances)), 55)
113
113
  t.add_row(status, str(len(instances)), instances_str)
114
114
  assert self.render_group is not None
115
- self.render_group.renderables[0] = t
115
+ self.render_group.renderables[1] = t
116
116
 
117
117
  def _update_total_costs(self) -> None:
118
118
  with self._lock: