llm-codegen-research 2.12__tar.gz → 2.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/PKG-INFO +1 -1
  2. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/src/llm_cgr/__init__.py +4 -0
  3. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/src/llm_cgr/llm/__init__.py +4 -0
  4. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/src/llm_cgr/llm/clients/__init__.py +31 -0
  5. llm_codegen_research-2.14/src/llm_cgr/llm/clients/openai_tool.py +328 -0
  6. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/src/llm_codegen_research.egg-info/PKG-INFO +1 -1
  7. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/src/llm_codegen_research.egg-info/SOURCES.txt +2 -0
  8. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/tests/test_llm_local.py +26 -0
  9. llm_codegen_research-2.14/tests/test_llm_tool.py +96 -0
  10. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/LICENSE +0 -0
  11. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/README.md +0 -0
  12. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/pyproject.toml +0 -0
  13. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/setup.cfg +0 -0
  14. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/src/llm_cgr/analyse/__init__.py +0 -0
  15. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/src/llm_cgr/analyse/classes.py +0 -0
  16. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/src/llm_cgr/analyse/languages/__init__.py +0 -0
  17. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/src/llm_cgr/analyse/languages/code_data.py +0 -0
  18. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/src/llm_cgr/analyse/languages/javascript.py +0 -0
  19. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/src/llm_cgr/analyse/languages/python.py +0 -0
  20. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/src/llm_cgr/analyse/languages/rust.py +0 -0
  21. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/src/llm_cgr/analyse/regexes.py +0 -0
  22. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/src/llm_cgr/decorators.py +0 -0
  23. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/src/llm_cgr/defaults.py +0 -0
  24. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/src/llm_cgr/enums.py +0 -0
  25. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/src/llm_cgr/json_utils.py +0 -0
  26. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/src/llm_cgr/llm/clients/anthropic.py +0 -0
  27. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/src/llm_cgr/llm/clients/base.py +0 -0
  28. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/src/llm_cgr/llm/clients/deepseek.py +0 -0
  29. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/src/llm_cgr/llm/clients/mistral.py +0 -0
  30. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/src/llm_cgr/llm/clients/nscale.py +0 -0
  31. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/src/llm_cgr/llm/clients/openai.py +0 -0
  32. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/src/llm_cgr/llm/clients/protocol.py +0 -0
  33. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/src/llm_cgr/llm/clients/together.py +0 -0
  34. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/src/llm_cgr/llm/generate.py +0 -0
  35. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/src/llm_cgr/llm/prompts.py +0 -0
  36. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/src/llm_cgr/py.typed +0 -0
  37. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/src/llm_cgr/scripts/test_cuda.py +0 -0
  38. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/src/llm_cgr/timeout.py +0 -0
  39. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/src/llm_codegen_research.egg-info/dependency_links.txt +0 -0
  40. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/src/llm_codegen_research.egg-info/entry_points.txt +0 -0
  41. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/src/llm_codegen_research.egg-info/requires.txt +0 -0
  42. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/src/llm_codegen_research.egg-info/top_level.txt +0 -0
  43. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/tests/test_enums.py +0 -0
  44. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/tests/test_json_utils.py +0 -0
  45. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/tests/test_llm_api.py +0 -0
  46. {llm_codegen_research-2.12 → llm_codegen_research-2.14}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llm-codegen-research
3
- Version: 2.12
3
+ Version: 2.14
4
4
  Summary: Useful classes and methods for researching code-generation by LLMs.
5
5
  Author-email: Lukas Twist <itsluketwist@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/itsluketwist/llm-codegen-research
@@ -45,7 +45,9 @@ try:
45
45
  GenerationProtocol,
46
46
  Mistral_LLM,
47
47
  OpenAI_LLM,
48
+ OpenAI_Tool_LLM,
48
49
  TogetherAI_LLM,
50
+ Tool,
49
51
  generate,
50
52
  generate_bool,
51
53
  generate_list,
@@ -64,6 +66,8 @@ try:
64
66
  "GenerationProtocol",
65
67
  "Mistral_LLM",
66
68
  "OpenAI_LLM",
69
+ "OpenAI_Tool_LLM",
70
+ "Tool",
67
71
  "TogetherAI_LLM",
68
72
  "generate",
69
73
  "generate_bool",
@@ -5,7 +5,9 @@ from llm_cgr.llm.clients import (
5
5
  GenerationProtocol,
6
6
  Mistral_LLM,
7
7
  OpenAI_LLM,
8
+ OpenAI_Tool_LLM,
8
9
  TogetherAI_LLM,
10
+ Tool,
9
11
  get_llm,
10
12
  )
11
13
  from llm_cgr.llm.generate import generate, generate_bool, generate_list
@@ -24,6 +26,8 @@ __all__ = [
24
26
  "GenerationProtocol",
25
27
  "Mistral_LLM",
26
28
  "OpenAI_LLM",
29
+ "OpenAI_Tool_LLM",
30
+ "Tool",
27
31
  "TogetherAI_LLM",
28
32
  "get_llm",
29
33
  "generate",
@@ -6,6 +6,12 @@ from llm_cgr.llm.clients.deepseek import DeepSeek_LLM
6
6
  from llm_cgr.llm.clients.mistral import Mistral_LLM
7
7
  from llm_cgr.llm.clients.nscale import Nscale_LLM
8
8
  from llm_cgr.llm.clients.openai import OpenAI_LLM
9
+ from llm_cgr.llm.clients.openai_tool import (
10
+ MAX_TOOL_CALLS,
11
+ MAX_TOOL_ITERATIONS,
12
+ OpenAI_Tool_LLM,
13
+ Tool,
14
+ )
9
15
  from llm_cgr.llm.clients.protocol import GenerationProtocol
10
16
  from llm_cgr.llm.clients.together import TogetherAI_LLM
11
17
 
@@ -27,9 +33,15 @@ def get_llm(
27
33
  top_p: float | None = None,
28
34
  max_tokens: int | None = None,
29
35
  provider: str | None = None,
36
+ tools: list[Tool] | None = None,
37
+ max_tool_iterations: int = MAX_TOOL_ITERATIONS,
38
+ max_tool_calls: int = MAX_TOOL_CALLS,
30
39
  ) -> GenerationProtocol:
31
40
  """
32
41
  Initialise the correct LLM client for the given model.
42
+
43
+ If tools are provided, returns an OpenAI_Tool_LLM instance. Tool calls
44
+ are currently only supported for OpenAI models.
33
45
  """
34
46
  llm_class: type[Base_LLM]
35
47
  if provider is not None:
@@ -45,6 +57,23 @@ def get_llm(
45
57
  else:
46
58
  llm_class = TogetherAI_LLM
47
59
 
60
+ # if tools are requested, use the tool-enabled subclass (openai only for now)
61
+ if tools is not None:
62
+ if llm_class is not OpenAI_LLM:
63
+ raise NotImplementedError(
64
+ "Tool calls are only supported for OpenAI models."
65
+ )
66
+ return OpenAI_Tool_LLM(
67
+ tools=tools,
68
+ model=model,
69
+ system=system,
70
+ temperature=temperature,
71
+ top_p=top_p,
72
+ max_tokens=max_tokens,
73
+ max_tool_iterations=max_tool_iterations,
74
+ max_tool_calls=max_tool_calls,
75
+ )
76
+
48
77
  return llm_class(
49
78
  model=model,
50
79
  system=system,
@@ -60,7 +89,9 @@ __all__ = [
60
89
  "DeepSeek_LLM",
61
90
  "GenerationProtocol",
62
91
  "OpenAI_LLM",
92
+ "OpenAI_Tool_LLM",
63
93
  "TogetherAI_LLM",
64
94
  "Mistral_LLM",
95
+ "Tool",
65
96
  "get_llm",
66
97
  ]
@@ -0,0 +1,328 @@
1
+ """OpenAI client subclass with an agentic tool-call loop."""
2
+
3
+ import json
4
+ from dataclasses import dataclass
5
+ from typing import Any, Callable, cast
6
+
7
+ import openai
8
+ from openai.types.responses import ResponseFunctionToolCall, ResponseInputItemParam
9
+
10
+ from llm_cgr.llm.clients.openai import OpenAI_LLM
11
+
12
+
13
+ # maximum tool-call rounds allowed within a single generate() or chat() call
14
+ MAX_TOOL_ITERATIONS: int = 5
15
+
16
+ # maximum total tool calls allowed across the lifetime of a client instance
17
+ MAX_TOOL_CALLS: int = 10
18
+
19
+
20
+ @dataclass
21
+ class Tool:
22
+ """
23
+ A tool (function) that the model can call during generation.
24
+
25
+ Attributes:
26
+ name: The function name the model uses to call this tool.
27
+ description: Describes what the tool does; the model uses this
28
+ to decide when to call it.
29
+ parameters: A JSON schema dict describing the function's parameters.
30
+ fn: The Python callable to invoke; must accept kwargs matching the
31
+ schema and return a str result.
32
+ """
33
+
34
+ name: str
35
+ description: str
36
+ parameters: dict[str, Any]
37
+ fn: Callable[..., str]
38
+
39
+
40
+ class OpenAI_Tool_LLM(OpenAI_LLM):
41
+ """OpenAI client with an agentic tool-call loop.
42
+
43
+ Tools are supplied at construction time and used for all subsequent
44
+ generate() and chat() calls. The client handles the full loop internally:
45
+ call the API, execute any tool calls, feed results back, repeat until the
46
+ model produces a final text response.
47
+ """
48
+
49
+ def __init__(
50
+ self,
51
+ tools: list[Tool],
52
+ model: str | None = None,
53
+ system: str | None = None,
54
+ temperature: float | None = None,
55
+ top_p: float | None = None,
56
+ max_tokens: int | None = None,
57
+ max_tool_iterations: int = MAX_TOOL_ITERATIONS,
58
+ max_tool_calls: int = MAX_TOOL_CALLS,
59
+ ) -> None:
60
+ """
61
+ Initialise the OpenAI tool client.
62
+
63
+ Requires the OPENAI_API_KEY environment variable to be set.
64
+ max_tool_iterations caps tool-call rounds within a single request.
65
+ max_tool_calls caps the cumulative total across all requests on this
66
+ instance. When either limit is reached, the model is sent a message
67
+ asking it to answer immediately without any further tool calls.
68
+ """
69
+ super().__init__(
70
+ model=model,
71
+ system=system,
72
+ temperature=temperature,
73
+ top_p=top_p,
74
+ max_tokens=max_tokens,
75
+ )
76
+ self._tools = tools
77
+ self._max_tool_iterations = max_tool_iterations
78
+ self._max_tool_calls = max_tool_calls
79
+ # cumulative count of individual tool calls made by this instance
80
+ self._tool_calls: int = 0
81
+
82
+ @property
83
+ def tool_calls(self) -> int:
84
+ """Total number of tool calls made by this client since instantiation.
85
+
86
+ Returns the cumulative count across all generate() and chat() calls.
87
+ Tip: record the value before a call and subtract to get the count for
88
+ that specific call.
89
+ """
90
+ return self._tool_calls
91
+
92
+ def _build_tool_param(
93
+ self,
94
+ tool: Tool,
95
+ ) -> dict[str, Any]:
96
+ """Convert a Tool dataclass to the dict format the OpenAI Responses API expects."""
97
+ return {
98
+ "type": "function",
99
+ "name": tool.name,
100
+ "description": tool.description,
101
+ "parameters": tool.parameters,
102
+ }
103
+
104
+ def _force_final_answer(
105
+ self,
106
+ current_input: list[Any],
107
+ model: str,
108
+ temperature: float | None,
109
+ top_p: float | None,
110
+ max_tokens: int | None,
111
+ ) -> str:
112
+ """Force the model to produce a text answer after a limit is reached.
113
+
114
+ Appends a user message telling the model it has used all its allowed
115
+ tool calls, then calls the API one final time without any tools so the
116
+ model cannot make further calls.
117
+
118
+ Returns the model's final text response.
119
+ """
120
+ # tell the model it must answer now — no more tool calls are allowed
121
+ current_input.append(
122
+ self._build_message(
123
+ role="user",
124
+ content=(
125
+ "You have reached the maximum number of tool calls allowed. "
126
+ "Please provide your final answer now based on the information "
127
+ "you have gathered, without calling any more tools."
128
+ ),
129
+ )
130
+ )
131
+ response = self._client.responses.create(
132
+ input=cast(list[ResponseInputItemParam], current_input),
133
+ model=model,
134
+ temperature=temperature if temperature is not None else openai.omit,
135
+ top_p=top_p if top_p is not None else openai.omit,
136
+ max_output_tokens=max_tokens if max_tokens is not None else openai.omit,
137
+ # no tools provided: the model cannot make further tool calls
138
+ )
139
+ return response.output_text
140
+
141
+ def _run_tool_loop(
142
+ self,
143
+ messages: list[dict[str, Any]],
144
+ model: str,
145
+ temperature: float | None,
146
+ top_p: float | None,
147
+ max_tokens: int | None,
148
+ ) -> str:
149
+ """Run the agentic tool-call loop for a single turn.
150
+
151
+ Calls the OpenAI API in a loop, executing any tool calls the model
152
+ requests, until the model produces a final text response or a limit is
153
+ reached. Two limits apply:
154
+ - max_tool_iterations: rounds allowed within this single call.
155
+ - max_tool_calls: cumulative total across all calls on this instance.
156
+ When either limit is hit, _force_final_answer() is called, which tells
157
+ the model to answer immediately without making any further tool calls.
158
+
159
+ Returns the final text response.
160
+ """
161
+ # convert Tool dataclasses to the API's function-tool format
162
+ api_tools = [self._build_tool_param(t) for t in self._tools]
163
+
164
+ # build a name -> Tool lookup map for fast dispatch during the loop
165
+ tool_map = {t.name: t for t in self._tools}
166
+
167
+ # shallow copy so intermediate tool-call scaffolding never mutates the
168
+ # caller's message list (prevents corruption of the chat history).
169
+ # typed as list[Any] so we can freely append both plain message dicts
170
+ # and the richer tool-call dicts without fighting the type checker.
171
+ current_input: list[Any] = list(messages)
172
+
173
+ for _ in range(self._max_tool_iterations):
174
+ response = self._client.responses.create(
175
+ input=cast(list[ResponseInputItemParam], current_input),
176
+ model=model,
177
+ temperature=temperature if temperature is not None else openai.omit,
178
+ top_p=top_p if top_p is not None else openai.omit,
179
+ max_output_tokens=max_tokens if max_tokens is not None else openai.omit,
180
+ tools=cast(Any, api_tools),
181
+ )
182
+
183
+ # collect any function calls the model requested in this response
184
+ function_calls = [
185
+ item for item in response.output if item.type == "function_call"
186
+ ]
187
+
188
+ # no tool calls means the model has produced its final text answer
189
+ if not function_calls:
190
+ return response.output_text
191
+
192
+ # check the overall cumulative limit before processing these calls.
193
+ # if adding them would exceed the limit, force a final answer now
194
+ # without executing any of the pending tool calls.
195
+ if self._tool_calls + len(function_calls) > self._max_tool_calls:
196
+ return self._force_final_answer(
197
+ current_input=current_input,
198
+ model=model,
199
+ temperature=temperature,
200
+ top_p=top_p,
201
+ max_tokens=max_tokens,
202
+ )
203
+
204
+ # increment the cumulative counter; parallel calls count individually
205
+ self._tool_calls += len(function_calls)
206
+
207
+ # process each tool call: the OpenAI Responses API requires that the
208
+ # function_call item appears in the next input before its matching
209
+ # function_call_output item
210
+ for _call in function_calls:
211
+ # cast to the concrete type so we can access .call_id/.name/.arguments
212
+ call = cast(ResponseFunctionToolCall, _call)
213
+
214
+ # append the function_call itself so the model sees what it called
215
+ current_input.append(
216
+ {
217
+ "type": "function_call",
218
+ "call_id": call.call_id,
219
+ "name": call.name,
220
+ "arguments": call.arguments,
221
+ }
222
+ )
223
+
224
+ # deserialise the model's json argument string and call the local fn
225
+ kwargs = json.loads(call.arguments)
226
+ result = tool_map[call.name].fn(**kwargs)
227
+
228
+ # append the result so the model can read it on the next turn
229
+ current_input.append(
230
+ {
231
+ "type": "function_call_output",
232
+ "call_id": call.call_id,
233
+ "output": result,
234
+ }
235
+ )
236
+
237
+ # loop continues: enriched input is sent back to the model
238
+
239
+ # max_tool_iterations exhausted — force the model to answer now
240
+ return self._force_final_answer(
241
+ current_input=current_input,
242
+ model=model,
243
+ temperature=temperature,
244
+ top_p=top_p,
245
+ max_tokens=max_tokens,
246
+ )
247
+
248
+ def generate(
249
+ self,
250
+ user: str,
251
+ system: str | None = None,
252
+ model: str | None = None,
253
+ samples: int = 1,
254
+ temperature: float | None = None,
255
+ top_p: float | None = None,
256
+ max_tokens: int | None = None,
257
+ ) -> list[str]:
258
+ """Generate model responses via the agentic tool-call loop."""
259
+ _model = model or self._model
260
+ if _model is None:
261
+ raise ValueError("Model must be specified for LLM APIs.")
262
+
263
+ messages = self._build_input(
264
+ user=user,
265
+ system=system or self._system,
266
+ )
267
+
268
+ _generations = []
269
+ for _ in range(samples):
270
+ result = self._run_tool_loop(
271
+ messages=messages,
272
+ model=_model,
273
+ temperature=temperature or self._temperature,
274
+ top_p=top_p or self._top_p,
275
+ max_tokens=max_tokens or self._max_tokens,
276
+ )
277
+ _generations.append(result)
278
+
279
+ return _generations
280
+
281
+ def chat(
282
+ self,
283
+ user: str,
284
+ system: str | None = None,
285
+ model: str | None = None,
286
+ temperature: float | None = None,
287
+ top_p: float | None = None,
288
+ max_tokens: int | None = None,
289
+ ) -> str:
290
+ """Run a chat turn via the agentic tool-call loop.
291
+
292
+ Manages self._history identically to the base class — only the final
293
+ text response is appended, not intermediate tool-call scaffolding.
294
+ """
295
+ _model = model or self._model
296
+ if _model is None:
297
+ raise ValueError("Model must be specified for LLM APIs.")
298
+
299
+ if self._history is None:
300
+ self._history = self._build_input(
301
+ user=user,
302
+ system=system or self._system,
303
+ )
304
+ else:
305
+ self._history.append(
306
+ self._build_message(
307
+ role="user",
308
+ content=user,
309
+ )
310
+ )
311
+
312
+ # _run_tool_loop operates on a shallow copy of self._history, so
313
+ # intermediate tool-call items never appear in the chat history
314
+ response = self._run_tool_loop(
315
+ messages=self._history,
316
+ model=_model,
317
+ temperature=temperature or self._temperature,
318
+ top_p=top_p or self._top_p,
319
+ max_tokens=max_tokens or self._max_tokens,
320
+ )
321
+
322
+ self._history.append(
323
+ self._build_message(
324
+ role="assistant",
325
+ content=response,
326
+ )
327
+ )
328
+ return response
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llm-codegen-research
3
- Version: 2.12
3
+ Version: 2.14
4
4
  Summary: Useful classes and methods for researching code-generation by LLMs.
5
5
  Author-email: Lukas Twist <itsluketwist@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/itsluketwist/llm-codegen-research
@@ -26,6 +26,7 @@ src/llm_cgr/llm/clients/deepseek.py
26
26
  src/llm_cgr/llm/clients/mistral.py
27
27
  src/llm_cgr/llm/clients/nscale.py
28
28
  src/llm_cgr/llm/clients/openai.py
29
+ src/llm_cgr/llm/clients/openai_tool.py
29
30
  src/llm_cgr/llm/clients/protocol.py
30
31
  src/llm_cgr/llm/clients/together.py
31
32
  src/llm_cgr/scripts/test_cuda.py
@@ -39,4 +40,5 @@ tests/test_enums.py
39
40
  tests/test_json_utils.py
40
41
  tests/test_llm_api.py
41
42
  tests/test_llm_local.py
43
+ tests/test_llm_tool.py
42
44
  tests/test_utils.py
@@ -10,8 +10,10 @@ from llm_cgr import (
10
10
  Mistral_LLM,
11
11
  OpenAI_LLM,
12
12
  TogetherAI_LLM,
13
+ Tool,
13
14
  generate_bool,
14
15
  generate_list,
16
+ get_llm,
15
17
  )
16
18
 
17
19
 
@@ -129,6 +131,30 @@ def test_build_input():
129
131
  ]
130
132
 
131
133
 
134
+ def test_tools_unsupported_provider():
135
+ """
136
+ Test that passing tools to a non-OpenAI provider raises NotImplementedError.
137
+
138
+ No API call is made because the error fires inside get_llm() before any
139
+ network request.
140
+ """
141
+ dummy_tool = Tool(
142
+ name="dummy",
143
+ description="A dummy tool.",
144
+ parameters={"type": "object", "properties": {}},
145
+ fn=lambda: "result",
146
+ )
147
+
148
+ # anthropic is not yet supported for tool calls
149
+ with pytest.raises(
150
+ NotImplementedError, match="Tool calls are only supported for OpenAI models."
151
+ ):
152
+ get_llm(
153
+ model="claude-3-5-haiku-20241022",
154
+ tools=[dummy_tool],
155
+ )
156
+
157
+
132
158
  @pytest.mark.parametrize(
133
159
  "response,error",
134
160
  [
@@ -0,0 +1,96 @@
1
+ """Test the OpenAI_Tool_LLM agentic tool-call loop."""
2
+
3
+ import pytest
4
+
5
+ from llm_cgr import OpenAI_Tool_LLM, Tool
6
+
7
+
8
+ # mark all tests in this file as api tests, so they can be excluded in ci
9
+ pytestmark = pytest.mark.api
10
+
11
+
12
+ def test_tool_call_generate(openai_model):
13
+ """
14
+ Test that the OpenAI tool client runs the agentic loop and returns the
15
+ correct answer via the tool.
16
+
17
+ Uses an addition tool: the model must call it to get the answer, so we can
18
+ verify a real tool call happened (the model cannot guess what our local
19
+ function returns without calling it).
20
+ """
21
+
22
+ def add(a: int, b: int) -> str:
23
+ """Add two integers and return the result as a string."""
24
+ return str(a + b)
25
+
26
+ add_tool = Tool(
27
+ name="add",
28
+ description="Add two integers together and return the result.",
29
+ parameters={
30
+ "type": "object",
31
+ "properties": {
32
+ "a": {"type": "integer", "description": "The first integer."},
33
+ "b": {"type": "integer", "description": "The second integer."},
34
+ },
35
+ "required": ["a", "b"],
36
+ "additionalProperties": False,
37
+ },
38
+ fn=add,
39
+ )
40
+
41
+ llm = OpenAI_Tool_LLM(tools=[add_tool], model=openai_model)
42
+ responses = llm.generate(
43
+ user="Use the add tool to compute 3 + 4. What is the result?"
44
+ )
45
+
46
+ assert isinstance(responses, list)
47
+ assert len(responses) == 1
48
+
49
+ result = responses[0]
50
+ assert isinstance(result, str)
51
+ assert len(result) > 0
52
+ # the correct sum proves the tool was actually called
53
+ assert "7" in result
54
+ assert llm.tool_calls >= 1
55
+
56
+
57
+ def test_tool_call_chat(openai_model):
58
+ """
59
+ Test that tool calls work correctly in a chat session, and that
60
+ intermediate tool-call scaffolding does not corrupt the chat history.
61
+ """
62
+
63
+ def multiply(a: int, b: int) -> str:
64
+ """Multiply two integers and return the result as a string."""
65
+ return str(a * b)
66
+
67
+ multiply_tool = Tool(
68
+ name="multiply",
69
+ description="Multiply two integers together and return the result.",
70
+ parameters={
71
+ "type": "object",
72
+ "properties": {
73
+ "a": {"type": "integer", "description": "The first integer."},
74
+ "b": {"type": "integer", "description": "The second integer."},
75
+ },
76
+ "required": ["a", "b"],
77
+ "additionalProperties": False,
78
+ },
79
+ fn=multiply,
80
+ )
81
+
82
+ llm = OpenAI_Tool_LLM(tools=[multiply_tool], model=openai_model)
83
+ response = llm.chat(
84
+ user="Use the multiply tool to compute 6 * 7. What is the result?"
85
+ )
86
+
87
+ assert isinstance(response, str)
88
+ assert "42" in response
89
+
90
+ # history should only contain the user turn and the final assistant response;
91
+ # no intermediate function_call or function_call_output items should have leaked in
92
+ history = llm.history
93
+ assert len(history) == 2
94
+ assert history[0]["role"] == "user"
95
+ assert history[1]["role"] == "assistant"
96
+ assert llm.tool_calls >= 1