llm-codegen-research 2.14__tar.gz → 2.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/PKG-INFO +2 -2
  2. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/README.md +1 -1
  3. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/defaults.py +4 -1
  4. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/__init__.py +6 -1
  5. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/anthropic.py +53 -7
  6. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/base.py +33 -15
  7. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/deepseek.py +14 -4
  8. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/mistral.py +25 -2
  9. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/nscale.py +7 -2
  10. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/openai.py +7 -2
  11. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/protocol.py +2 -2
  12. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/together.py +27 -2
  13. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/generate.py +36 -1
  14. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_codegen_research.egg-info/PKG-INFO +2 -2
  15. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/LICENSE +0 -0
  16. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/pyproject.toml +0 -0
  17. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/setup.cfg +0 -0
  18. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/__init__.py +0 -0
  19. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/analyse/__init__.py +0 -0
  20. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/analyse/classes.py +0 -0
  21. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/analyse/languages/__init__.py +0 -0
  22. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/analyse/languages/code_data.py +0 -0
  23. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/analyse/languages/javascript.py +0 -0
  24. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/analyse/languages/python.py +0 -0
  25. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/analyse/languages/rust.py +0 -0
  26. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/analyse/regexes.py +0 -0
  27. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/decorators.py +0 -0
  28. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/enums.py +0 -0
  29. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/json_utils.py +0 -0
  30. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/__init__.py +0 -0
  31. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/openai_tool.py +0 -0
  32. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/prompts.py +0 -0
  33. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/py.typed +0 -0
  34. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/scripts/test_cuda.py +0 -0
  35. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/timeout.py +0 -0
  36. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_codegen_research.egg-info/SOURCES.txt +0 -0
  37. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_codegen_research.egg-info/dependency_links.txt +0 -0
  38. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_codegen_research.egg-info/entry_points.txt +0 -0
  39. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_codegen_research.egg-info/requires.txt +0 -0
  40. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_codegen_research.egg-info/top_level.txt +0 -0
  41. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/tests/test_enums.py +0 -0
  42. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/tests/test_json_utils.py +0 -0
  43. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/tests/test_llm_api.py +0 -0
  44. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/tests/test_llm_local.py +0 -0
  45. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/tests/test_llm_tool.py +0 -0
  46. {llm_codegen_research-2.14 → llm_codegen_research-2.16}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llm-codegen-research
3
- Version: 2.14
3
+ Version: 2.16
4
4
  Summary: Useful classes and methods for researching code-generation by LLMs.
5
5
  Author-email: Lukas Twist <itsluketwist@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/itsluketwist/llm-codegen-research
@@ -158,7 +158,7 @@ uv add openai
158
158
  Or to upgrade dependencies:
159
159
 
160
160
  ```shell
161
- uv sync --upgrade
161
+ uv sync --extra api --upgrade
162
162
  ```
163
163
 
164
164
  Check typings with `ty`:
@@ -138,7 +138,7 @@ uv add openai
138
138
  Or to upgrade dependencies:
139
139
 
140
140
  ```shell
141
- uv sync --upgrade
141
+ uv sync --extra api --upgrade
142
142
  ```
143
143
 
144
144
  Check typings with `ty`:
@@ -7,4 +7,7 @@ DEFAULT_MODEL = "gpt-4.1-mini-2025-04-14"
7
7
  DEFAULT_CODEBLOCK_LANGUAGE = "python"
8
8
 
9
9
  # the default max_tokens to be used when prompting models
10
- DEFAULT_MAX_TOKENS = 2000
10
+ DEFAULT_MAX_TOKENS = 4096
11
+
12
+ # default token budget for anthropic extended thinking (minimum allowed is 1024)
13
+ DEFAULT_THINKING_BUDGET = 2048
@@ -33,6 +33,7 @@ def get_llm(
33
33
  top_p: float | None = None,
34
34
  max_tokens: int | None = None,
35
35
  provider: str | None = None,
36
+ enable_reasoning: bool = False,
36
37
  tools: list[Tool] | None = None,
37
38
  max_tool_iterations: int = MAX_TOOL_ITERATIONS,
38
39
  max_tool_calls: int = MAX_TOOL_CALLS,
@@ -41,7 +42,8 @@ def get_llm(
41
42
  Initialise the correct LLM client for the given model.
42
43
 
43
44
  If tools are provided, returns an OpenAI_Tool_LLM instance. Tool calls
44
- are currently only supported for OpenAI models.
45
+ are currently only supported for OpenAI models. enable_reasoning is only
46
+ supported by Anthropic, DeepSeek, Mistral, and TogetherAI models.
45
47
  """
46
48
  llm_class: type[Base_LLM]
47
49
  if provider is not None:
@@ -63,6 +65,8 @@ def get_llm(
63
65
  raise NotImplementedError(
64
66
  "Tool calls are only supported for OpenAI models."
65
67
  )
68
+ if enable_reasoning:
69
+ raise ValueError("OpenAI_Tool_LLM does not support enable_reasoning.")
66
70
  return OpenAI_Tool_LLM(
67
71
  tools=tools,
68
72
  model=model,
@@ -80,6 +84,7 @@ def get_llm(
80
84
  temperature=temperature,
81
85
  top_p=top_p,
82
86
  max_tokens=max_tokens,
87
+ enable_reasoning=enable_reasoning,
83
88
  )
84
89
 
85
90
 
@@ -3,9 +3,14 @@
3
3
  from typing import Any, cast
4
4
 
5
5
  import anthropic
6
- from anthropic.types import MessageParam, TextBlock
6
+ from anthropic.types import (
7
+ MessageParam,
8
+ TextBlock,
9
+ ThinkingBlock,
10
+ ThinkingConfigEnabledParam,
11
+ )
7
12
 
8
- from llm_cgr.defaults import DEFAULT_MAX_TOKENS
13
+ from llm_cgr.defaults import DEFAULT_MAX_TOKENS, DEFAULT_THINKING_BUDGET
9
14
  from llm_cgr.llm.clients.base import Base_LLM
10
15
 
11
16
 
@@ -19,11 +24,14 @@ class Anthropic_LLM(Base_LLM):
19
24
  temperature: float | None = None,
20
25
  top_p: float | None = None,
21
26
  max_tokens: int | None = None,
27
+ enable_reasoning: bool = False,
22
28
  ) -> None:
23
29
  """
24
30
  Initialise the Anthropic client.
25
31
 
26
32
  Requires the ANTHROPIC_API_KEY environment variable to be set.
33
+ Set enable_reasoning=True to enable extended thinking on supported models
34
+ (e.g. claude-sonnet-4-5).
27
35
  """
28
36
  super().__init__(
29
37
  model=model,
@@ -31,6 +39,7 @@ class Anthropic_LLM(Base_LLM):
31
39
  temperature=temperature,
32
40
  top_p=top_p,
33
41
  max_tokens=max_tokens,
42
+ enable_reasoning=enable_reasoning,
34
43
  )
35
44
  self._client = anthropic.Anthropic()
36
45
 
@@ -66,15 +75,52 @@ class Anthropic_LLM(Base_LLM):
66
75
  temperature: float | None = None,
67
76
  top_p: float | None = None,
68
77
  max_tokens: int | None = None,
69
- ) -> str:
78
+ ) -> tuple[str, str | None]:
70
79
  """Generate a model response from the Anthropic API."""
80
+ # extended thinking is incompatible with custom temperature/top_p
81
+ thinking = (
82
+ ThinkingConfigEnabledParam(
83
+ type="enabled",
84
+ budget_tokens=DEFAULT_THINKING_BUDGET,
85
+ )
86
+ if self._enable_reasoning
87
+ else anthropic.omit
88
+ )
89
+ # custom temperature/top_p are not supported alongside extended thinking,
90
+ # and the api rejects requests that set both temperature and top_p
91
+ _temperature = (
92
+ temperature
93
+ if temperature is not None and not self._enable_reasoning
94
+ else anthropic.omit
95
+ )
96
+ _top_p = (
97
+ top_p
98
+ if top_p is not None
99
+ and not self._enable_reasoning
100
+ and _temperature is anthropic.omit
101
+ else anthropic.omit
102
+ )
103
+
71
104
  response = self._client.messages.create(
72
105
  model=model,
73
106
  system=system or self._system or anthropic.omit,
74
107
  messages=cast(list[MessageParam], input),
75
- temperature=temperature if temperature is not None else anthropic.omit,
76
- top_p=top_p if top_p is not None else anthropic.omit,
108
+ temperature=_temperature,
109
+ top_p=_top_p,
77
110
  max_tokens=max_tokens if max_tokens is not None else DEFAULT_MAX_TOKENS,
111
+ thinking=thinking,
112
+ )
113
+
114
+ # collect chain-of-thought from any thinking blocks; None if not present
115
+ thinking_blocks = [
116
+ block.thinking
117
+ for block in response.content
118
+ if isinstance(block, ThinkingBlock)
119
+ ]
120
+ reasoning = "\n".join(thinking_blocks) if thinking_blocks else None
121
+
122
+ # the final answer is always returned as a text block
123
+ text_block = next(
124
+ block for block in response.content if isinstance(block, TextBlock)
78
125
  )
79
- # cast to TextBlock as non-tool, non-thinking requests always return text
80
- return cast(TextBlock, response.content[0]).text
126
+ return text_block.text, reasoning
@@ -1,3 +1,5 @@
1
+ """Base class for LLM API clients."""
2
+
1
3
  from abc import ABC, abstractmethod
2
4
  from typing import Any
3
5
 
@@ -12,9 +14,13 @@ class Base_LLM(ABC):
12
14
  temperature: float | None = None,
13
15
  top_p: float | None = None,
14
16
  max_tokens: int | None = None,
17
+ enable_reasoning: bool = False,
15
18
  ) -> None:
16
19
  """
17
20
  Initialise the LLM client.
21
+
22
+ When enable_reasoning is True, generate() and chat() include chain-of-thought
23
+ alongside responses, and reasoning is stored in the chat history.
18
24
  """
19
25
  self._model = model
20
26
  self._system = system
@@ -24,6 +30,7 @@ class Base_LLM(ABC):
24
30
  self._top_p = top_p
25
31
  self._max_tokens = max_tokens
26
32
 
33
+ self._enable_reasoning = enable_reasoning
27
34
  self._history: list[dict[str, Any]] | None = None
28
35
 
29
36
  def generate(
@@ -35,9 +42,12 @@ class Base_LLM(ABC):
35
42
  temperature: float | None = None,
36
43
  top_p: float | None = None,
37
44
  max_tokens: int | None = None,
38
- ) -> list[str]:
45
+ ) -> list[str] | list[tuple[str, str | None]]:
39
46
  """
40
47
  Generate model responses from the LLMs API.
48
+
49
+ When enable_reasoning is True, returns a list of (response, reasoning) tuples.
50
+ When False, returns a list of response strings.
41
51
  """
42
52
  _model = model or self._model
43
53
  if _model is None:
@@ -48,16 +58,19 @@ class Base_LLM(ABC):
48
58
  system=system or self._system,
49
59
  )
50
60
 
51
- _generations = []
61
+ _generations: list[Any] = []
52
62
  for _ in range(samples):
53
- response = self._get_response(
63
+ response, reasoning = self._get_response(
54
64
  input=messages,
55
65
  model=_model,
56
66
  temperature=temperature or self._temperature,
57
67
  top_p=top_p or self._top_p,
58
68
  max_tokens=max_tokens or self._max_tokens,
59
69
  )
60
- _generations.append(response)
70
+ if self._enable_reasoning:
71
+ _generations.append((response, reasoning))
72
+ else:
73
+ _generations.append(response)
61
74
 
62
75
  return _generations
63
76
 
@@ -69,9 +82,12 @@ class Base_LLM(ABC):
69
82
  temperature: float | None = None,
70
83
  top_p: float | None = None,
71
84
  max_tokens: int | None = None,
72
- ) -> str:
85
+ ) -> str | tuple[str, str | None]:
73
86
  """
74
87
  Generate a model response from the LLMs API, in the ongoing chat.
88
+
89
+ When enable_reasoning is True, reasoning is stored in the history and the
90
+ return value is a (response, reasoning) tuple instead of a plain string.
75
91
  """
76
92
  _model = model or self._model
77
93
  if _model is None:
@@ -92,7 +108,7 @@ class Base_LLM(ABC):
92
108
  )
93
109
  )
94
110
 
95
- response = self._get_response(
111
+ response, reasoning = self._get_response(
96
112
  input=self._history,
97
113
  system=system,
98
114
  model=_model,
@@ -101,13 +117,14 @@ class Base_LLM(ABC):
101
117
  max_tokens=max_tokens or self._max_tokens,
102
118
  )
103
119
 
104
- # update the history and return
105
- self._history.append(
106
- self._build_message(
107
- role="assistant",
108
- content=response,
109
- )
110
- )
120
+ # build the assistant history entry, attaching reasoning if present
121
+ assistant_message = self._build_message(role="assistant", content=response)
122
+ if self._enable_reasoning and reasoning is not None:
123
+ assistant_message["reasoning_content"] = reasoning
124
+ self._history.append(assistant_message)
125
+
126
+ if self._enable_reasoning:
127
+ return response, reasoning
111
128
  return response
112
129
 
113
130
  @property
@@ -146,9 +163,10 @@ class Base_LLM(ABC):
146
163
  temperature: float | None = None,
147
164
  top_p: float | None = None,
148
165
  max_tokens: int | None = None,
149
- ) -> str:
166
+ ) -> tuple[str, str | None]:
150
167
  """
151
168
  Generate a model response from the LLM API.
152
169
 
153
- Returns the text response to the prompt.
170
+ Returns a (response, reasoning) tuple; reasoning is None for models that
171
+ do not produce chain-of-thought output.
154
172
  """
@@ -1,4 +1,4 @@
1
- """Class to access LLMs via the OpenAI API."""
1
+ """Class to access LLMs via the DeepSeek API."""
2
2
 
3
3
  import os
4
4
  from typing import Any, cast
@@ -19,11 +19,13 @@ class DeepSeek_LLM(Base_LLM):
19
19
  temperature: float | None = None,
20
20
  top_p: float | None = None,
21
21
  max_tokens: int | None = None,
22
+ enable_reasoning: bool = False,
22
23
  ) -> None:
23
24
  """
24
25
  Initialise the DeepSeek client.
25
26
 
26
27
  Requires the DEEPSEEK_API_KEY environment variable to be set.
28
+ Set enable_reasoning=True when using a reasoning model (e.g. deepseek-reasoner).
27
29
  """
28
30
  super().__init__(
29
31
  model=model,
@@ -31,6 +33,7 @@ class DeepSeek_LLM(Base_LLM):
31
33
  temperature=temperature,
32
34
  top_p=top_p,
33
35
  max_tokens=max_tokens,
36
+ enable_reasoning=enable_reasoning,
34
37
  )
35
38
  self._client = openai.OpenAI(
36
39
  api_key=os.environ["DEEPSEEK_API_KEY"],
@@ -65,14 +68,21 @@ class DeepSeek_LLM(Base_LLM):
65
68
  temperature: float | None = None,
66
69
  top_p: float | None = None,
67
70
  max_tokens: int | None = None,
68
- ) -> str:
69
- """Generate a model response from the OpenAI API."""
71
+ ) -> tuple[str, str | None]:
72
+ """Generate a model response from the DeepSeek API."""
70
73
  response = self._client.chat.completions.create(
71
74
  messages=cast(list[ChatCompletionMessageParam], input),
72
75
  model=model,
73
76
  temperature=temperature if temperature is not None else openai.omit,
74
77
  top_p=top_p if top_p is not None else openai.omit,
75
78
  max_completion_tokens=max_tokens if max_tokens is not None else openai.omit,
79
+ reasoning_effort="high",
80
+ extra_body={"thinking": {"type": "enabled"}},
76
81
  )
82
+ message = response.choices[0].message
83
+
84
+ # chain-of-thought from reasoning models (e.g. deepseek-reasoner); None otherwise
85
+ reasoning = getattr(message, "reasoning_content", None)
86
+
77
87
  # cast to str as text completions always return string content
78
- return cast(str, response.choices[0].message.content)
88
+ return cast(str, message.content), reasoning
@@ -4,6 +4,7 @@ import os
4
4
  from typing import Any
5
5
 
6
6
  from mistralai import client
7
+ from mistralai.client.models import TextChunk, ThinkChunk
7
8
 
8
9
  from llm_cgr.llm.clients.base import Base_LLM
9
10
 
@@ -18,11 +19,14 @@ class Mistral_LLM(Base_LLM):
18
19
  temperature: float | None = None,
19
20
  top_p: float | None = None,
20
21
  max_tokens: int | None = None,
22
+ enable_reasoning: bool = False,
21
23
  ) -> None:
22
24
  """
23
25
  Initialise the Mistral client.
24
26
 
25
27
  Requires the MISTRAL_API_KEY environment variable to be set.
28
+ Set enable_reasoning=True to request chain-of-thought from reasoning
29
+ models (e.g. magistral-medium-latest).
26
30
  """
27
31
  super().__init__(
28
32
  model=model,
@@ -30,6 +34,7 @@ class Mistral_LLM(Base_LLM):
30
34
  temperature=temperature,
31
35
  top_p=top_p,
32
36
  max_tokens=max_tokens,
37
+ enable_reasoning=enable_reasoning,
33
38
  )
34
39
  self._client = client.Mistral(
35
40
  api_key=os.environ["MISTRAL_API_KEY"],
@@ -66,7 +71,7 @@ class Mistral_LLM(Base_LLM):
66
71
  temperature: float | None = None,
67
72
  top_p: float | None = None,
68
73
  max_tokens: int | None = None,
69
- ) -> str:
74
+ ) -> tuple[str, str | None]:
70
75
  """Generate a model response from the MistralAI API."""
71
76
  response = self._client.chat.complete(
72
77
  model=model,
@@ -74,5 +79,23 @@ class Mistral_LLM(Base_LLM):
74
79
  temperature=temperature if temperature is not None else client.UNSET,
75
80
  top_p=top_p,
76
81
  max_tokens=max_tokens if max_tokens is not None else client.UNSET,
82
+ reasoning_effort="high" if self._enable_reasoning else client.UNSET,
77
83
  )
78
- return response.choices[0].message.content
84
+ content = response.choices[0].message.content
85
+
86
+ # plain string content means no reasoning chunks were returned
87
+ if isinstance(content, str):
88
+ return content, None
89
+
90
+ # otherwise content is a list of chunks: thinking and final text
91
+ reasoning_parts = [
92
+ inner.text
93
+ for chunk in content
94
+ if isinstance(chunk, ThinkChunk)
95
+ for inner in chunk.thinking
96
+ if isinstance(inner, TextChunk)
97
+ ]
98
+ text_parts = [chunk.text for chunk in content if isinstance(chunk, TextChunk)]
99
+
100
+ reasoning = "\n".join(reasoning_parts) if reasoning_parts else None
101
+ return "\n".join(text_parts), reasoning
@@ -19,18 +19,23 @@ class Nscale_LLM(Base_LLM):
19
19
  temperature: float | None = None,
20
20
  top_p: float | None = None,
21
21
  max_tokens: int | None = None,
22
+ enable_reasoning: bool = False,
22
23
  ) -> None:
23
24
  """
24
25
  Initialise the NSCALE client.
25
26
 
26
27
  Requires the NSCALE_API_KEY environment variable to be set.
27
28
  """
29
+ if enable_reasoning:
30
+ raise ValueError("Nscale_LLM does not support enable_reasoning.")
31
+
28
32
  super().__init__(
29
33
  model=model,
30
34
  system=system,
31
35
  temperature=temperature,
32
36
  top_p=top_p,
33
37
  max_tokens=max_tokens,
38
+ enable_reasoning=enable_reasoning,
34
39
  )
35
40
  self._client = openai.OpenAI(
36
41
  api_key=os.environ["NSCALE_API_KEY"],
@@ -65,7 +70,7 @@ class Nscale_LLM(Base_LLM):
65
70
  temperature: float | None = None,
66
71
  top_p: float | None = None,
67
72
  max_tokens: int | None = None,
68
- ) -> str:
73
+ ) -> tuple[str, str | None]:
69
74
  """Generate a model response from the OpenAI API."""
70
75
  response = self._client.chat.completions.create(
71
76
  messages=cast(list[ChatCompletionMessageParam], input),
@@ -75,4 +80,4 @@ class Nscale_LLM(Base_LLM):
75
80
  max_completion_tokens=max_tokens if max_tokens is not None else openai.omit,
76
81
  )
77
82
  # cast to str as text completions always return string content
78
- return cast(str, response.choices[0].message.content)
83
+ return cast(str, response.choices[0].message.content), None
@@ -18,18 +18,23 @@ class OpenAI_LLM(Base_LLM):
18
18
  temperature: float | None = None,
19
19
  top_p: float | None = None,
20
20
  max_tokens: int | None = None,
21
+ enable_reasoning: bool = False,
21
22
  ) -> None:
22
23
  """
23
24
  Initialise the OpenAI client.
24
25
 
25
26
  Requires the OPENAI_API_KEY environment variable to be set.
26
27
  """
28
+ if enable_reasoning:
29
+ raise ValueError("OpenAI_LLM does not support enable_reasoning.")
30
+
27
31
  super().__init__(
28
32
  model=model,
29
33
  system=system,
30
34
  temperature=temperature,
31
35
  top_p=top_p,
32
36
  max_tokens=max_tokens,
37
+ enable_reasoning=enable_reasoning,
33
38
  )
34
39
  self._client = openai.OpenAI()
35
40
 
@@ -61,7 +66,7 @@ class OpenAI_LLM(Base_LLM):
61
66
  temperature: int | float | None = None,
62
67
  top_p: int | float | None = None,
63
68
  max_tokens: int | None = None,
64
- ) -> str:
69
+ ) -> tuple[str, str | None]:
65
70
  """Generate a model response from the OpenAI API."""
66
71
  self._client.responses.input_items
67
72
  response = self._client.responses.create(
@@ -71,4 +76,4 @@ class OpenAI_LLM(Base_LLM):
71
76
  top_p=top_p if top_p is not None else openai.omit,
72
77
  max_output_tokens=max_tokens if max_tokens is not None else openai.omit,
73
78
  )
74
- return response.output_text
79
+ return response.output_text, None
@@ -17,7 +17,7 @@ class GenerationProtocol(Protocol):
17
17
  temperature: float | None = None,
18
18
  top_p: float | None = None,
19
19
  max_tokens: int | None = None,
20
- ) -> list[str]:
20
+ ) -> list[str] | list[tuple[str, str | None]]:
21
21
  """
22
22
  Generate model responses from the LLMs API.
23
23
  """
@@ -30,7 +30,7 @@ class GenerationProtocol(Protocol):
30
30
  temperature: float | None = None,
31
31
  top_p: float | None = None,
32
32
  max_tokens: int | None = None,
33
- ) -> str:
33
+ ) -> str | tuple[str, str | None]:
34
34
  """
35
35
  Generate a model response from the LLMs API, in the ongoing chat.
36
36
  """
@@ -1,5 +1,6 @@
1
1
  """Class to access LLMs via the TogetherAI API."""
2
2
 
3
+ import re
3
4
  from typing import Any, cast
4
5
 
5
6
  import together
@@ -7,6 +8,11 @@ import together
7
8
  from llm_cgr.llm.clients.base import Base_LLM
8
9
 
9
10
 
11
+ # matches a <think>...</think> block at the start of a response, used by
12
+ # models that embed their reasoning trace directly in the content
13
+ _THINK_BLOCK = re.compile(r"\A<think>(.*?)</think>\s*", re.DOTALL)
14
+
15
+
10
16
  class TogetherAI_LLM(Base_LLM):
11
17
  """Class to access LLMs via the TogetherAI API."""
12
18
 
@@ -17,11 +23,13 @@ class TogetherAI_LLM(Base_LLM):
17
23
  temperature: float | None = None,
18
24
  top_p: float | None = None,
19
25
  max_tokens: int | None = None,
26
+ enable_reasoning: bool = False,
20
27
  ) -> None:
21
28
  """
22
29
  Initialise the TogetherAI client.
23
30
 
24
31
  Requires the TOGETHER_API_KEY environment variable to be set.
32
+ Set enable_reasoning=True when using a reasoning model (e.g. deepseek-ai/DeepSeek-R1).
25
33
  """
26
34
  super().__init__(
27
35
  model=model,
@@ -29,6 +37,7 @@ class TogetherAI_LLM(Base_LLM):
29
37
  temperature=temperature,
30
38
  top_p=top_p,
31
39
  max_tokens=max_tokens,
40
+ enable_reasoning=enable_reasoning,
32
41
  )
33
42
  self._client = together.Together()
34
43
 
@@ -60,7 +69,7 @@ class TogetherAI_LLM(Base_LLM):
60
69
  temperature: float | None = None,
61
70
  top_p: float | None = None,
62
71
  max_tokens: int | None = None,
63
- ) -> str:
72
+ ) -> tuple[str, str | None]:
64
73
  """Generate a model response from the TogetherAI API."""
65
74
  response = self._client.chat.completions.create(
66
75
  model=model,
@@ -72,4 +81,20 @@ class TogetherAI_LLM(Base_LLM):
72
81
  # cast to Any first as together doesn't publicly export the message type,
73
82
  # then cast content to str as text completions always have it set
74
83
  message = cast(Any, response.choices[0].message)
75
- return cast(str, message.content)
84
+ content = cast(str, message.content)
85
+
86
+ # chain-of-thought from reasoning models: most (e.g. DeepSeek-R1) use
87
+ # reasoning_content, some (e.g. Kimi-K2.6) use reasoning; None otherwise
88
+ reasoning = getattr(message, "reasoning_content", None) or getattr(
89
+ message, "reasoning", None
90
+ )
91
+
92
+ # some models embed their reasoning as a <think>...</think> block at
93
+ # the start of content instead of a separate field; pull it out
94
+ if reasoning is None:
95
+ think_match = _THINK_BLOCK.match(content)
96
+ if think_match:
97
+ reasoning = think_match.group(1).strip()
98
+ content = content[think_match.end() :]
99
+
100
+ return content, reasoning
@@ -1,10 +1,13 @@
1
1
  """API utilities for interfacing with the generation models."""
2
2
 
3
+ from typing import Literal, overload
4
+
3
5
  from llm_cgr.defaults import DEFAULT_MODEL
4
6
  from llm_cgr.llm.clients import get_llm
5
7
  from llm_cgr.llm.prompts import BOOL_SYSTEM_PROMPT, LIST_SYSTEM_PROMPT
6
8
 
7
9
 
10
+ @overload
8
11
  def generate(
9
12
  user: str,
10
13
  model: str = DEFAULT_MODEL,
@@ -13,15 +16,47 @@ def generate(
13
16
  top_p: float | None = None,
14
17
  max_tokens: int | None = None,
15
18
  provider: str | None = None,
19
+ enable_reasoning: Literal[False] = False,
16
20
  **generate_kwargs,
17
- ) -> str:
21
+ ) -> str: ...
22
+
23
+
24
+ @overload
25
+ def generate(
26
+ user: str,
27
+ model: str = DEFAULT_MODEL,
28
+ system: str | None = None,
29
+ temperature: float | None = None,
30
+ top_p: float | None = None,
31
+ max_tokens: int | None = None,
32
+ provider: str | None = None,
33
+ enable_reasoning: Literal[True] = True,
34
+ **generate_kwargs,
35
+ ) -> tuple[str, str | None]: ...
36
+
37
+
38
+ def generate(
39
+ user: str,
40
+ model: str = DEFAULT_MODEL,
41
+ system: str | None = None,
42
+ temperature: float | None = None,
43
+ top_p: float | None = None,
44
+ max_tokens: int | None = None,
45
+ provider: str | None = None,
46
+ enable_reasoning: bool = False,
47
+ **generate_kwargs,
48
+ ) -> str | tuple[str, str | None]:
18
49
  """
19
50
  Simple function to quickly prompt a model for a response.
51
+
52
+ When enable_reasoning is True, returns a (response, reasoning) tuple instead
53
+ of a plain string.
20
54
  """
21
55
  client = get_llm(
22
56
  model=model,
23
57
  system=system,
24
58
  provider=provider,
59
+ enable_reasoning=enable_reasoning,
25
60
  )
26
61
  [result] = client.generate(
27
62
  user=user,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llm-codegen-research
3
- Version: 2.14
3
+ Version: 2.16
4
4
  Summary: Useful classes and methods for researching code-generation by LLMs.
5
5
  Author-email: Lukas Twist <itsluketwist@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/itsluketwist/llm-codegen-research
@@ -158,7 +158,7 @@ uv add openai
158
158
  Or to upgrade dependencies:
159
159
 
160
160
  ```shell
161
- uv sync --upgrade
161
+ uv sync --extra api --upgrade
162
162
  ```
163
163
 
164
164
  Check typings with `ty`: