llm-codegen-research 2.13__tar.gz → 2.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/PKG-INFO +1 -1
  2. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_cgr/llm/clients/__init__.py +10 -1
  3. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_cgr/llm/clients/anthropic.py +2 -2
  4. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_cgr/llm/clients/base.py +33 -15
  5. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_cgr/llm/clients/deepseek.py +12 -4
  6. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_cgr/llm/clients/mistral.py +2 -2
  7. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_cgr/llm/clients/nscale.py +2 -2
  8. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_cgr/llm/clients/openai.py +2 -2
  9. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_cgr/llm/clients/openai_tool.py +77 -7
  10. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_cgr/llm/clients/protocol.py +2 -2
  11. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_cgr/llm/clients/together.py +2 -2
  12. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_cgr/llm/generate.py +4 -1
  13. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_codegen_research.egg-info/PKG-INFO +1 -1
  14. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_codegen_research.egg-info/SOURCES.txt +1 -0
  15. llm_codegen_research-2.15/tests/test_llm_deepseek_reasoning.py +136 -0
  16. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/LICENSE +0 -0
  17. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/README.md +0 -0
  18. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/pyproject.toml +0 -0
  19. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/setup.cfg +0 -0
  20. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_cgr/__init__.py +0 -0
  21. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_cgr/analyse/__init__.py +0 -0
  22. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_cgr/analyse/classes.py +0 -0
  23. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_cgr/analyse/languages/__init__.py +0 -0
  24. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_cgr/analyse/languages/code_data.py +0 -0
  25. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_cgr/analyse/languages/javascript.py +0 -0
  26. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_cgr/analyse/languages/python.py +0 -0
  27. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_cgr/analyse/languages/rust.py +0 -0
  28. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_cgr/analyse/regexes.py +0 -0
  29. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_cgr/decorators.py +0 -0
  30. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_cgr/defaults.py +0 -0
  31. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_cgr/enums.py +0 -0
  32. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_cgr/json_utils.py +0 -0
  33. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_cgr/llm/__init__.py +0 -0
  34. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_cgr/llm/prompts.py +0 -0
  35. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_cgr/py.typed +0 -0
  36. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_cgr/scripts/test_cuda.py +0 -0
  37. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_cgr/timeout.py +0 -0
  38. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_codegen_research.egg-info/dependency_links.txt +0 -0
  39. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_codegen_research.egg-info/entry_points.txt +0 -0
  40. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_codegen_research.egg-info/requires.txt +0 -0
  41. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/src/llm_codegen_research.egg-info/top_level.txt +0 -0
  42. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/tests/test_enums.py +0 -0
  43. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/tests/test_json_utils.py +0 -0
  44. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/tests/test_llm_api.py +0 -0
  45. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/tests/test_llm_local.py +0 -0
  46. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/tests/test_llm_tool.py +0 -0
  47. {llm_codegen_research-2.13 → llm_codegen_research-2.15}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llm-codegen-research
3
- Version: 2.13
3
+ Version: 2.15
4
4
  Summary: Useful classes and methods for researching code-generation by LLMs.
5
5
  Author-email: Lukas Twist <itsluketwist@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/itsluketwist/llm-codegen-research
@@ -6,7 +6,12 @@ from llm_cgr.llm.clients.deepseek import DeepSeek_LLM
6
6
  from llm_cgr.llm.clients.mistral import Mistral_LLM
7
7
  from llm_cgr.llm.clients.nscale import Nscale_LLM
8
8
  from llm_cgr.llm.clients.openai import OpenAI_LLM
9
- from llm_cgr.llm.clients.openai_tool import OpenAI_Tool_LLM, Tool
9
+ from llm_cgr.llm.clients.openai_tool import (
10
+ MAX_TOOL_CALLS,
11
+ MAX_TOOL_ITERATIONS,
12
+ OpenAI_Tool_LLM,
13
+ Tool,
14
+ )
10
15
  from llm_cgr.llm.clients.protocol import GenerationProtocol
11
16
  from llm_cgr.llm.clients.together import TogetherAI_LLM
12
17
 
@@ -29,6 +34,8 @@ def get_llm(
29
34
  max_tokens: int | None = None,
30
35
  provider: str | None = None,
31
36
  tools: list[Tool] | None = None,
37
+ max_tool_iterations: int = MAX_TOOL_ITERATIONS,
38
+ max_tool_calls: int = MAX_TOOL_CALLS,
32
39
  ) -> GenerationProtocol:
33
40
  """
34
41
  Initialise the correct LLM client for the given model.
@@ -63,6 +70,8 @@ def get_llm(
63
70
  temperature=temperature,
64
71
  top_p=top_p,
65
72
  max_tokens=max_tokens,
73
+ max_tool_iterations=max_tool_iterations,
74
+ max_tool_calls=max_tool_calls,
66
75
  )
67
76
 
68
77
  return llm_class(
@@ -66,7 +66,7 @@ class Anthropic_LLM(Base_LLM):
66
66
  temperature: float | None = None,
67
67
  top_p: float | None = None,
68
68
  max_tokens: int | None = None,
69
- ) -> str:
69
+ ) -> tuple[str, str | None]:
70
70
  """Generate a model response from the Anthropic API."""
71
71
  response = self._client.messages.create(
72
72
  model=model,
@@ -77,4 +77,4 @@ class Anthropic_LLM(Base_LLM):
77
77
  max_tokens=max_tokens if max_tokens is not None else DEFAULT_MAX_TOKENS,
78
78
  )
79
79
  # cast to TextBlock as non-tool, non-thinking requests always return text
80
- return cast(TextBlock, response.content[0]).text
80
+ return cast(TextBlock, response.content[0]).text, None
@@ -1,3 +1,5 @@
1
+ """Base class for LLM API clients."""
2
+
1
3
  from abc import ABC, abstractmethod
2
4
  from typing import Any
3
5
 
@@ -12,9 +14,13 @@ class Base_LLM(ABC):
12
14
  temperature: float | None = None,
13
15
  top_p: float | None = None,
14
16
  max_tokens: int | None = None,
17
+ enable_reasoning: bool = False,
15
18
  ) -> None:
16
19
  """
17
20
  Initialise the LLM client.
21
+
22
+ When enable_reasoning is True, generate() and chat() include chain-of-thought
23
+ alongside responses, and reasoning is stored in the chat history.
18
24
  """
19
25
  self._model = model
20
26
  self._system = system
@@ -24,6 +30,7 @@ class Base_LLM(ABC):
24
30
  self._top_p = top_p
25
31
  self._max_tokens = max_tokens
26
32
 
33
+ self._enable_reasoning = enable_reasoning
27
34
  self._history: list[dict[str, Any]] | None = None
28
35
 
29
36
  def generate(
@@ -35,9 +42,12 @@ class Base_LLM(ABC):
35
42
  temperature: float | None = None,
36
43
  top_p: float | None = None,
37
44
  max_tokens: int | None = None,
38
- ) -> list[str]:
45
+ ) -> list[str] | list[tuple[str, str | None]]:
39
46
  """
40
47
  Generate model responses from the LLMs API.
48
+
49
+ When enable_reasoning is True, returns a list of (response, reasoning) tuples.
50
+ When False, returns a list of response strings.
41
51
  """
42
52
  _model = model or self._model
43
53
  if _model is None:
@@ -48,16 +58,19 @@ class Base_LLM(ABC):
48
58
  system=system or self._system,
49
59
  )
50
60
 
51
- _generations = []
61
+ _generations: list[Any] = []
52
62
  for _ in range(samples):
53
- response = self._get_response(
63
+ response, reasoning = self._get_response(
54
64
  input=messages,
55
65
  model=_model,
56
66
  temperature=temperature or self._temperature,
57
67
  top_p=top_p or self._top_p,
58
68
  max_tokens=max_tokens or self._max_tokens,
59
69
  )
60
- _generations.append(response)
70
+ if self._enable_reasoning:
71
+ _generations.append((response, reasoning))
72
+ else:
73
+ _generations.append(response)
61
74
 
62
75
  return _generations
63
76
 
@@ -69,9 +82,12 @@ class Base_LLM(ABC):
69
82
  temperature: float | None = None,
70
83
  top_p: float | None = None,
71
84
  max_tokens: int | None = None,
72
- ) -> str:
85
+ ) -> str | tuple[str, str | None]:
73
86
  """
74
87
  Generate a model response from the LLMs API, in the ongoing chat.
88
+
89
+ When enable_reasoning is True, reasoning is stored in the history and the
90
+ return value is a (response, reasoning) tuple instead of a plain string.
75
91
  """
76
92
  _model = model or self._model
77
93
  if _model is None:
@@ -92,7 +108,7 @@ class Base_LLM(ABC):
92
108
  )
93
109
  )
94
110
 
95
- response = self._get_response(
111
+ response, reasoning = self._get_response(
96
112
  input=self._history,
97
113
  system=system,
98
114
  model=_model,
@@ -101,13 +117,14 @@ class Base_LLM(ABC):
101
117
  max_tokens=max_tokens or self._max_tokens,
102
118
  )
103
119
 
104
- # update the history and return
105
- self._history.append(
106
- self._build_message(
107
- role="assistant",
108
- content=response,
109
- )
110
- )
120
+ # build the assistant history entry, attaching reasoning if present
121
+ assistant_message = self._build_message(role="assistant", content=response)
122
+ if self._enable_reasoning and reasoning is not None:
123
+ assistant_message["reasoning_content"] = reasoning
124
+ self._history.append(assistant_message)
125
+
126
+ if self._enable_reasoning:
127
+ return response, reasoning
111
128
  return response
112
129
 
113
130
  @property
@@ -146,9 +163,10 @@ class Base_LLM(ABC):
146
163
  temperature: float | None = None,
147
164
  top_p: float | None = None,
148
165
  max_tokens: int | None = None,
149
- ) -> str:
166
+ ) -> tuple[str, str | None]:
150
167
  """
151
168
  Generate a model response from the LLM API.
152
169
 
153
- Returns the text response to the prompt.
170
+ Returns a (response, reasoning) tuple; reasoning is None for models that
171
+ do not produce chain-of-thought output.
154
172
  """
@@ -1,4 +1,4 @@
1
- """Class to access LLMs via the OpenAI API."""
1
+ """Class to access LLMs via the DeepSeek API."""
2
2
 
3
3
  import os
4
4
  from typing import Any, cast
@@ -19,11 +19,13 @@ class DeepSeek_LLM(Base_LLM):
19
19
  temperature: float | None = None,
20
20
  top_p: float | None = None,
21
21
  max_tokens: int | None = None,
22
+ enable_reasoning: bool = False,
22
23
  ) -> None:
23
24
  """
24
25
  Initialise the DeepSeek client.
25
26
 
26
27
  Requires the DEEPSEEK_API_KEY environment variable to be set.
28
+ Set enable_reasoning=True when using a reasoning model (e.g. deepseek-reasoner).
27
29
  """
28
30
  super().__init__(
29
31
  model=model,
@@ -31,6 +33,7 @@ class DeepSeek_LLM(Base_LLM):
31
33
  temperature=temperature,
32
34
  top_p=top_p,
33
35
  max_tokens=max_tokens,
36
+ enable_reasoning=enable_reasoning,
34
37
  )
35
38
  self._client = openai.OpenAI(
36
39
  api_key=os.environ["DEEPSEEK_API_KEY"],
@@ -65,8 +68,8 @@ class DeepSeek_LLM(Base_LLM):
65
68
  temperature: float | None = None,
66
69
  top_p: float | None = None,
67
70
  max_tokens: int | None = None,
68
- ) -> str:
69
- """Generate a model response from the OpenAI API."""
71
+ ) -> tuple[str, str | None]:
72
+ """Generate a model response from the DeepSeek API."""
70
73
  response = self._client.chat.completions.create(
71
74
  messages=cast(list[ChatCompletionMessageParam], input),
72
75
  model=model,
@@ -74,5 +77,10 @@ class DeepSeek_LLM(Base_LLM):
74
77
  top_p=top_p if top_p is not None else openai.omit,
75
78
  max_completion_tokens=max_tokens if max_tokens is not None else openai.omit,
76
79
  )
80
+ message = response.choices[0].message
81
+
82
+ # chain-of-thought from reasoning models (e.g. deepseek-reasoner); None otherwise
83
+ reasoning = getattr(message, "reasoning_content", None)
84
+
77
85
  # cast to str as text completions always return string content
78
- return cast(str, response.choices[0].message.content)
86
+ return cast(str, message.content), reasoning
@@ -66,7 +66,7 @@ class Mistral_LLM(Base_LLM):
66
66
  temperature: float | None = None,
67
67
  top_p: float | None = None,
68
68
  max_tokens: int | None = None,
69
- ) -> str:
69
+ ) -> tuple[str, str | None]:
70
70
  """Generate a model response from the MistralAI API."""
71
71
  response = self._client.chat.complete(
72
72
  model=model,
@@ -75,4 +75,4 @@ class Mistral_LLM(Base_LLM):
75
75
  top_p=top_p,
76
76
  max_tokens=max_tokens if max_tokens is not None else client.UNSET,
77
77
  )
78
- return response.choices[0].message.content
78
+ return response.choices[0].message.content, None
@@ -65,7 +65,7 @@ class Nscale_LLM(Base_LLM):
65
65
  temperature: float | None = None,
66
66
  top_p: float | None = None,
67
67
  max_tokens: int | None = None,
68
- ) -> str:
68
+ ) -> tuple[str, str | None]:
69
69
  """Generate a model response from the OpenAI API."""
70
70
  response = self._client.chat.completions.create(
71
71
  messages=cast(list[ChatCompletionMessageParam], input),
@@ -75,4 +75,4 @@ class Nscale_LLM(Base_LLM):
75
75
  max_completion_tokens=max_tokens if max_tokens is not None else openai.omit,
76
76
  )
77
77
  # cast to str as text completions always return string content
78
- return cast(str, response.choices[0].message.content)
78
+ return cast(str, response.choices[0].message.content), None
@@ -61,7 +61,7 @@ class OpenAI_LLM(Base_LLM):
61
61
  temperature: int | float | None = None,
62
62
  top_p: int | float | None = None,
63
63
  max_tokens: int | None = None,
64
- ) -> str:
64
+ ) -> tuple[str, str | None]:
65
65
  """Generate a model response from the OpenAI API."""
66
66
  self._client.responses.input_items
67
67
  response = self._client.responses.create(
@@ -71,4 +71,4 @@ class OpenAI_LLM(Base_LLM):
71
71
  top_p=top_p if top_p is not None else openai.omit,
72
72
  max_output_tokens=max_tokens if max_tokens is not None else openai.omit,
73
73
  )
74
- return response.output_text
74
+ return response.output_text, None
@@ -10,8 +10,11 @@ from openai.types.responses import ResponseFunctionToolCall, ResponseInputItemPa
10
10
  from llm_cgr.llm.clients.openai import OpenAI_LLM
11
11
 
12
12
 
13
- # maximum number of tool-call iterations per request, to prevent runaway loops
14
- MAX_TOOL_ITERATIONS: int = 10
13
+ # maximum tool-call rounds allowed within a single generate() or chat() call
14
+ MAX_TOOL_ITERATIONS: int = 5
15
+
16
+ # maximum total tool calls allowed across the lifetime of a client instance
17
+ MAX_TOOL_CALLS: int = 10
15
18
 
16
19
 
17
20
  @dataclass
@@ -51,11 +54,17 @@ class OpenAI_Tool_LLM(OpenAI_LLM):
51
54
  temperature: float | None = None,
52
55
  top_p: float | None = None,
53
56
  max_tokens: int | None = None,
57
+ max_tool_iterations: int = MAX_TOOL_ITERATIONS,
58
+ max_tool_calls: int = MAX_TOOL_CALLS,
54
59
  ) -> None:
55
60
  """
56
61
  Initialise the OpenAI tool client.
57
62
 
58
63
  Requires the OPENAI_API_KEY environment variable to be set.
64
+ max_tool_iterations caps tool-call rounds within a single request.
65
+ max_tool_calls caps the cumulative total across all requests on this
66
+ instance. When either limit is reached, the model is sent a message
67
+ asking it to answer immediately without any further tool calls.
59
68
  """
60
69
  super().__init__(
61
70
  model=model,
@@ -65,6 +74,8 @@ class OpenAI_Tool_LLM(OpenAI_LLM):
65
74
  max_tokens=max_tokens,
66
75
  )
67
76
  self._tools = tools
77
+ self._max_tool_iterations = max_tool_iterations
78
+ self._max_tool_calls = max_tool_calls
68
79
  # cumulative count of individual tool calls made by this instance
69
80
  self._tool_calls: int = 0
70
81
 
@@ -90,6 +101,43 @@ class OpenAI_Tool_LLM(OpenAI_LLM):
90
101
  "parameters": tool.parameters,
91
102
  }
92
103
 
104
+ def _force_final_answer(
105
+ self,
106
+ current_input: list[Any],
107
+ model: str,
108
+ temperature: float | None,
109
+ top_p: float | None,
110
+ max_tokens: int | None,
111
+ ) -> str:
112
+ """Force the model to produce a text answer after a limit is reached.
113
+
114
+ Appends a user message telling the model it has used all its allowed
115
+ tool calls, then calls the API one final time without any tools so the
116
+ model cannot make further calls.
117
+
118
+ Returns the model's final text response.
119
+ """
120
+ # tell the model it must answer now — no more tool calls are allowed
121
+ current_input.append(
122
+ self._build_message(
123
+ role="user",
124
+ content=(
125
+ "You have reached the maximum number of tool calls allowed. "
126
+ "Please provide your final answer now based on the information "
127
+ "you have gathered, without calling any more tools."
128
+ ),
129
+ )
130
+ )
131
+ response = self._client.responses.create(
132
+ input=cast(list[ResponseInputItemParam], current_input),
133
+ model=model,
134
+ temperature=temperature if temperature is not None else openai.omit,
135
+ top_p=top_p if top_p is not None else openai.omit,
136
+ max_output_tokens=max_tokens if max_tokens is not None else openai.omit,
137
+ # no tools provided: the model cannot make further tool calls
138
+ )
139
+ return response.output_text
140
+
93
141
  def _run_tool_loop(
94
142
  self,
95
143
  messages: list[dict[str, Any]],
@@ -101,8 +149,12 @@ class OpenAI_Tool_LLM(OpenAI_LLM):
101
149
  """Run the agentic tool-call loop for a single turn.
102
150
 
103
151
  Calls the OpenAI API in a loop, executing any tool calls the model
104
- requests, until the model produces a final text response or the
105
- MAX_TOOL_ITERATIONS safety limit is reached.
152
+ requests, until the model produces a final text response or a limit is
153
+ reached. Two limits apply:
154
+ - max_tool_iterations: rounds allowed within this single call.
155
+ - max_tool_calls: cumulative total across all calls on this instance.
156
+ When either limit is hit, _force_final_answer() is called, which tells
157
+ the model to answer immediately without making any further tool calls.
106
158
 
107
159
  Returns the final text response.
108
160
  """
@@ -118,7 +170,7 @@ class OpenAI_Tool_LLM(OpenAI_LLM):
118
170
  # and the richer tool-call dicts without fighting the type checker.
119
171
  current_input: list[Any] = list(messages)
120
172
 
121
- for _ in range(MAX_TOOL_ITERATIONS):
173
+ for _ in range(self._max_tool_iterations):
122
174
  response = self._client.responses.create(
123
175
  input=cast(list[ResponseInputItemParam], current_input),
124
176
  model=model,
@@ -137,6 +189,18 @@ class OpenAI_Tool_LLM(OpenAI_LLM):
137
189
  if not function_calls:
138
190
  return response.output_text
139
191
 
192
+ # check the overall cumulative limit before processing these calls.
193
+ # if adding them would exceed the limit, force a final answer now
194
+ # without executing any of the pending tool calls.
195
+ if self._tool_calls + len(function_calls) > self._max_tool_calls:
196
+ return self._force_final_answer(
197
+ current_input=current_input,
198
+ model=model,
199
+ temperature=temperature,
200
+ top_p=top_p,
201
+ max_tokens=max_tokens,
202
+ )
203
+
140
204
  # increment the cumulative counter; parallel calls count individually
141
205
  self._tool_calls += len(function_calls)
142
206
 
@@ -172,8 +236,14 @@ class OpenAI_Tool_LLM(OpenAI_LLM):
172
236
 
173
237
  # loop continues: enriched input is sent back to the model
174
238
 
175
- # safety fallback: return whatever text the model produced on the last turn
176
- return response.output_text
239
+ # max_tool_iterations exhausted force the model to answer now
240
+ return self._force_final_answer(
241
+ current_input=current_input,
242
+ model=model,
243
+ temperature=temperature,
244
+ top_p=top_p,
245
+ max_tokens=max_tokens,
246
+ )
177
247
 
178
248
  def generate(
179
249
  self,
@@ -17,7 +17,7 @@ class GenerationProtocol(Protocol):
17
17
  temperature: float | None = None,
18
18
  top_p: float | None = None,
19
19
  max_tokens: int | None = None,
20
- ) -> list[str]:
20
+ ) -> list[str] | list[tuple[str, str | None]]:
21
21
  """
22
22
  Generate model responses from the LLMs API.
23
23
  """
@@ -30,7 +30,7 @@ class GenerationProtocol(Protocol):
30
30
  temperature: float | None = None,
31
31
  top_p: float | None = None,
32
32
  max_tokens: int | None = None,
33
- ) -> str:
33
+ ) -> str | tuple[str, str | None]:
34
34
  """
35
35
  Generate a model response from the LLMs API, in the ongoing chat.
36
36
  """
@@ -60,7 +60,7 @@ class TogetherAI_LLM(Base_LLM):
60
60
  temperature: float | None = None,
61
61
  top_p: float | None = None,
62
62
  max_tokens: int | None = None,
63
- ) -> str:
63
+ ) -> tuple[str, str | None]:
64
64
  """Generate a model response from the TogetherAI API."""
65
65
  response = self._client.chat.completions.create(
66
66
  model=model,
@@ -72,4 +72,4 @@ class TogetherAI_LLM(Base_LLM):
72
72
  # cast to Any first as together doesn't publicly export the message type,
73
73
  # then cast content to str as text completions always have it set
74
74
  message = cast(Any, response.choices[0].message)
75
- return cast(str, message.content)
75
+ return cast(str, message.content), None
@@ -1,5 +1,7 @@
1
1
  """API utilities for interfacing with the generation models."""
2
2
 
3
+ from typing import cast
4
+
3
5
  from llm_cgr.defaults import DEFAULT_MODEL
4
6
  from llm_cgr.llm.clients import get_llm
5
7
  from llm_cgr.llm.prompts import BOOL_SYSTEM_PROMPT, LIST_SYSTEM_PROMPT
@@ -31,7 +33,8 @@ def generate(
31
33
  max_tokens=max_tokens,
32
34
  **generate_kwargs,
33
35
  )
34
- return result
36
+ # enable_reasoning is False by default, so result is always a plain string
37
+ return cast(str, result)
35
38
 
36
39
 
37
40
  def generate_list(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llm-codegen-research
3
- Version: 2.13
3
+ Version: 2.15
4
4
  Summary: Useful classes and methods for researching code-generation by LLMs.
5
5
  Author-email: Lukas Twist <itsluketwist@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/itsluketwist/llm-codegen-research
@@ -39,6 +39,7 @@ src/llm_codegen_research.egg-info/top_level.txt
39
39
  tests/test_enums.py
40
40
  tests/test_json_utils.py
41
41
  tests/test_llm_api.py
42
+ tests/test_llm_deepseek_reasoning.py
42
43
  tests/test_llm_local.py
43
44
  tests/test_llm_tool.py
44
45
  tests/test_utils.py
@@ -0,0 +1,136 @@
1
+ """Tests for DeepSeek reasoning model support."""
2
+
3
+ import pytest
4
+
5
+ from llm_cgr.llm.clients.deepseek import DeepSeek_LLM
6
+
7
+
8
+ # mark all tests in this file as api tests, so they can be excluded in ci
9
+ pytestmark = pytest.mark.api
10
+
11
+ # standard model returns no chain-of-thought; reasoner model does
12
+ CHAT_MODEL = "deepseek-chat"
13
+ REASONER_MODEL = "deepseek-reasoner"
14
+
15
+ USER_PROMPT = "How many r's are in 'strawberry'?"
16
+
17
+
18
+ def test_generate_no_reasoning():
19
+ """
20
+ Test that generate returns plain strings when enable_reasoning is False (default).
21
+ """
22
+ llm = DeepSeek_LLM(model=CHAT_MODEL)
23
+ results = llm.generate(user=USER_PROMPT)
24
+
25
+ assert isinstance(results, list)
26
+ assert len(results) == 1
27
+ # result should be a plain string, not a tuple
28
+ assert isinstance(results[0], str)
29
+ assert len(results[0]) > 0
30
+
31
+
32
+ def test_generate_with_reasoning_returns_tuples():
33
+ """
34
+ Test that generate returns (response, reasoning) tuples when enable_reasoning is True.
35
+ """
36
+ llm = DeepSeek_LLM(model=REASONER_MODEL, enable_reasoning=True)
37
+ results = llm.generate(user=USER_PROMPT)
38
+
39
+ assert isinstance(results, list)
40
+ assert len(results) == 1
41
+
42
+ response, reasoning = results[0]
43
+
44
+ # response should be a non-empty string
45
+ assert isinstance(response, str)
46
+ assert len(response) > 0
47
+
48
+ # the reasoner model should always produce chain-of-thought
49
+ assert isinstance(reasoning, str)
50
+ assert len(reasoning) > 0
51
+
52
+
53
+ def test_generate_non_reasoning_model_has_no_reasoning():
54
+ """
55
+ Test that a standard (non-reasoner) model returns None for reasoning even when enabled.
56
+ """
57
+ llm = DeepSeek_LLM(model=CHAT_MODEL, enable_reasoning=True)
58
+ results = llm.generate(user=USER_PROMPT)
59
+
60
+ response, reasoning = results[0]
61
+
62
+ assert isinstance(response, str)
63
+ assert len(response) > 0
64
+ # deepseek-chat does not produce reasoning content
65
+ assert reasoning is None
66
+
67
+
68
+ def test_chat_no_reasoning():
69
+ """
70
+ Test that chat returns a plain string and history has no reasoning_content
71
+ when enable_reasoning is False (default).
72
+ """
73
+ llm = DeepSeek_LLM(model=CHAT_MODEL)
74
+ response = llm.chat(user=USER_PROMPT)
75
+
76
+ assert isinstance(response, str)
77
+ assert len(response) > 0
78
+
79
+ # history entries should each have exactly role and content
80
+ history = llm.history
81
+ assert all("reasoning_content" not in msg for msg in history)
82
+
83
+
84
+ def test_chat_with_reasoning_returns_tuple():
85
+ """
86
+ Test that chat returns a (response, reasoning) tuple when enable_reasoning is True.
87
+ """
88
+ llm = DeepSeek_LLM(model=REASONER_MODEL, enable_reasoning=True)
89
+ result = llm.chat(user=USER_PROMPT)
90
+
91
+ assert isinstance(result, tuple)
92
+ response, reasoning = result
93
+
94
+ assert isinstance(response, str)
95
+ assert len(response) > 0
96
+
97
+ assert isinstance(reasoning, str)
98
+ assert len(reasoning) > 0
99
+
100
+
101
+ def test_chat_reasoning_stored_in_history():
102
+ """
103
+ Test that reasoning is stored on the assistant history entry when enable_reasoning is True.
104
+ """
105
+ llm = DeepSeek_LLM(model=REASONER_MODEL, enable_reasoning=True)
106
+ llm.chat(user=USER_PROMPT)
107
+
108
+ history = llm.history
109
+ # find the assistant message
110
+ assistant_msgs = [msg for msg in history if msg["role"] == "assistant"]
111
+ assert len(assistant_msgs) == 1
112
+
113
+ assistant_msg = assistant_msgs[0]
114
+ assert "reasoning_content" in assistant_msg
115
+ assert isinstance(assistant_msg["reasoning_content"], str)
116
+ assert len(assistant_msg["reasoning_content"]) > 0
117
+
118
+
119
+ def test_chat_multi_turn_reasoning_stored_per_turn():
120
+ """
121
+ Test that reasoning is captured and stored for each turn in a multi-turn chat.
122
+ """
123
+ llm = DeepSeek_LLM(model=REASONER_MODEL, enable_reasoning=True)
124
+
125
+ llm.chat(user="What is 2 + 2?")
126
+ llm.chat(user="And what is that result multiplied by 3?")
127
+
128
+ history = llm.history
129
+ assistant_msgs = [msg for msg in history if msg["role"] == "assistant"]
130
+ assert len(assistant_msgs) == 2
131
+
132
+ # both assistant turns should have reasoning attached
133
+ for msg in assistant_msgs:
134
+ assert "reasoning_content" in msg
135
+ assert isinstance(msg["reasoning_content"], str)
136
+ assert len(msg["reasoning_content"]) > 0