llm-codegen-research 2.14__tar.gz → 2.16__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/PKG-INFO +2 -2
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/README.md +1 -1
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/defaults.py +4 -1
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/__init__.py +6 -1
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/anthropic.py +53 -7
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/base.py +33 -15
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/deepseek.py +14 -4
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/mistral.py +25 -2
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/nscale.py +7 -2
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/openai.py +7 -2
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/protocol.py +2 -2
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/together.py +27 -2
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/generate.py +36 -1
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_codegen_research.egg-info/PKG-INFO +2 -2
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/LICENSE +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/pyproject.toml +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/setup.cfg +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/__init__.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/analyse/__init__.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/analyse/classes.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/analyse/languages/__init__.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/analyse/languages/code_data.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/analyse/languages/javascript.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/analyse/languages/python.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/analyse/languages/rust.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/analyse/regexes.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/decorators.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/enums.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/json_utils.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/__init__.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/openai_tool.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/prompts.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/py.typed +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/scripts/test_cuda.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/timeout.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_codegen_research.egg-info/SOURCES.txt +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_codegen_research.egg-info/dependency_links.txt +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_codegen_research.egg-info/entry_points.txt +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_codegen_research.egg-info/requires.txt +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_codegen_research.egg-info/top_level.txt +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/tests/test_enums.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/tests/test_json_utils.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/tests/test_llm_api.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/tests/test_llm_local.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/tests/test_llm_tool.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.16}/tests/test_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: llm-codegen-research
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.16
|
|
4
4
|
Summary: Useful classes and methods for researching code-generation by LLMs.
|
|
5
5
|
Author-email: Lukas Twist <itsluketwist@gmail.com>
|
|
6
6
|
Project-URL: Homepage, https://github.com/itsluketwist/llm-codegen-research
|
|
@@ -158,7 +158,7 @@ uv add openai
|
|
|
158
158
|
Or to upgrade dependencies:
|
|
159
159
|
|
|
160
160
|
```shell
|
|
161
|
-
uv sync --upgrade
|
|
161
|
+
uv sync --extra api --upgrade
|
|
162
162
|
```
|
|
163
163
|
|
|
164
164
|
Check typings with `ty`:
|
|
@@ -7,4 +7,7 @@ DEFAULT_MODEL = "gpt-4.1-mini-2025-04-14"
|
|
|
7
7
|
DEFAULT_CODEBLOCK_LANGUAGE = "python"
|
|
8
8
|
|
|
9
9
|
# the default max_tokens to be used when prompting models
|
|
10
|
-
DEFAULT_MAX_TOKENS =
|
|
10
|
+
DEFAULT_MAX_TOKENS = 4096
|
|
11
|
+
|
|
12
|
+
# default token budget for anthropic extended thinking (minimum allowed is 1024)
|
|
13
|
+
DEFAULT_THINKING_BUDGET = 2048
|
|
@@ -33,6 +33,7 @@ def get_llm(
|
|
|
33
33
|
top_p: float | None = None,
|
|
34
34
|
max_tokens: int | None = None,
|
|
35
35
|
provider: str | None = None,
|
|
36
|
+
enable_reasoning: bool = False,
|
|
36
37
|
tools: list[Tool] | None = None,
|
|
37
38
|
max_tool_iterations: int = MAX_TOOL_ITERATIONS,
|
|
38
39
|
max_tool_calls: int = MAX_TOOL_CALLS,
|
|
@@ -41,7 +42,8 @@ def get_llm(
|
|
|
41
42
|
Initialise the correct LLM client for the given model.
|
|
42
43
|
|
|
43
44
|
If tools are provided, returns an OpenAI_Tool_LLM instance. Tool calls
|
|
44
|
-
are currently only supported for OpenAI models.
|
|
45
|
+
are currently only supported for OpenAI models. enable_reasoning is only
|
|
46
|
+
supported by Anthropic, DeepSeek, Mistral, and TogetherAI models.
|
|
45
47
|
"""
|
|
46
48
|
llm_class: type[Base_LLM]
|
|
47
49
|
if provider is not None:
|
|
@@ -63,6 +65,8 @@ def get_llm(
|
|
|
63
65
|
raise NotImplementedError(
|
|
64
66
|
"Tool calls are only supported for OpenAI models."
|
|
65
67
|
)
|
|
68
|
+
if enable_reasoning:
|
|
69
|
+
raise ValueError("OpenAI_Tool_LLM does not support enable_reasoning.")
|
|
66
70
|
return OpenAI_Tool_LLM(
|
|
67
71
|
tools=tools,
|
|
68
72
|
model=model,
|
|
@@ -80,6 +84,7 @@ def get_llm(
|
|
|
80
84
|
temperature=temperature,
|
|
81
85
|
top_p=top_p,
|
|
82
86
|
max_tokens=max_tokens,
|
|
87
|
+
enable_reasoning=enable_reasoning,
|
|
83
88
|
)
|
|
84
89
|
|
|
85
90
|
|
{llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/anthropic.py
RENAMED
|
@@ -3,9 +3,14 @@
|
|
|
3
3
|
from typing import Any, cast
|
|
4
4
|
|
|
5
5
|
import anthropic
|
|
6
|
-
from anthropic.types import
|
|
6
|
+
from anthropic.types import (
|
|
7
|
+
MessageParam,
|
|
8
|
+
TextBlock,
|
|
9
|
+
ThinkingBlock,
|
|
10
|
+
ThinkingConfigEnabledParam,
|
|
11
|
+
)
|
|
7
12
|
|
|
8
|
-
from llm_cgr.defaults import DEFAULT_MAX_TOKENS
|
|
13
|
+
from llm_cgr.defaults import DEFAULT_MAX_TOKENS, DEFAULT_THINKING_BUDGET
|
|
9
14
|
from llm_cgr.llm.clients.base import Base_LLM
|
|
10
15
|
|
|
11
16
|
|
|
@@ -19,11 +24,14 @@ class Anthropic_LLM(Base_LLM):
|
|
|
19
24
|
temperature: float | None = None,
|
|
20
25
|
top_p: float | None = None,
|
|
21
26
|
max_tokens: int | None = None,
|
|
27
|
+
enable_reasoning: bool = False,
|
|
22
28
|
) -> None:
|
|
23
29
|
"""
|
|
24
30
|
Initialise the Anthropic client.
|
|
25
31
|
|
|
26
32
|
Requires the ANTHROPIC_API_KEY environment variable to be set.
|
|
33
|
+
Set enable_reasoning=True to enable extended thinking on supported models
|
|
34
|
+
(e.g. claude-sonnet-4-5).
|
|
27
35
|
"""
|
|
28
36
|
super().__init__(
|
|
29
37
|
model=model,
|
|
@@ -31,6 +39,7 @@ class Anthropic_LLM(Base_LLM):
|
|
|
31
39
|
temperature=temperature,
|
|
32
40
|
top_p=top_p,
|
|
33
41
|
max_tokens=max_tokens,
|
|
42
|
+
enable_reasoning=enable_reasoning,
|
|
34
43
|
)
|
|
35
44
|
self._client = anthropic.Anthropic()
|
|
36
45
|
|
|
@@ -66,15 +75,52 @@ class Anthropic_LLM(Base_LLM):
|
|
|
66
75
|
temperature: float | None = None,
|
|
67
76
|
top_p: float | None = None,
|
|
68
77
|
max_tokens: int | None = None,
|
|
69
|
-
) -> str:
|
|
78
|
+
) -> tuple[str, str | None]:
|
|
70
79
|
"""Generate a model response from the Anthropic API."""
|
|
80
|
+
# extended thinking is incompatible with custom temperature/top_p
|
|
81
|
+
thinking = (
|
|
82
|
+
ThinkingConfigEnabledParam(
|
|
83
|
+
type="enabled",
|
|
84
|
+
budget_tokens=DEFAULT_THINKING_BUDGET,
|
|
85
|
+
)
|
|
86
|
+
if self._enable_reasoning
|
|
87
|
+
else anthropic.omit
|
|
88
|
+
)
|
|
89
|
+
# custom temperature/top_p are not supported alongside extended thinking,
|
|
90
|
+
# and the api rejects requests that set both temperature and top_p
|
|
91
|
+
_temperature = (
|
|
92
|
+
temperature
|
|
93
|
+
if temperature is not None and not self._enable_reasoning
|
|
94
|
+
else anthropic.omit
|
|
95
|
+
)
|
|
96
|
+
_top_p = (
|
|
97
|
+
top_p
|
|
98
|
+
if top_p is not None
|
|
99
|
+
and not self._enable_reasoning
|
|
100
|
+
and _temperature is anthropic.omit
|
|
101
|
+
else anthropic.omit
|
|
102
|
+
)
|
|
103
|
+
|
|
71
104
|
response = self._client.messages.create(
|
|
72
105
|
model=model,
|
|
73
106
|
system=system or self._system or anthropic.omit,
|
|
74
107
|
messages=cast(list[MessageParam], input),
|
|
75
|
-
temperature=
|
|
76
|
-
top_p=
|
|
108
|
+
temperature=_temperature,
|
|
109
|
+
top_p=_top_p,
|
|
77
110
|
max_tokens=max_tokens if max_tokens is not None else DEFAULT_MAX_TOKENS,
|
|
111
|
+
thinking=thinking,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
# collect chain-of-thought from any thinking blocks; None if not present
|
|
115
|
+
thinking_blocks = [
|
|
116
|
+
block.thinking
|
|
117
|
+
for block in response.content
|
|
118
|
+
if isinstance(block, ThinkingBlock)
|
|
119
|
+
]
|
|
120
|
+
reasoning = "\n".join(thinking_blocks) if thinking_blocks else None
|
|
121
|
+
|
|
122
|
+
# the final answer is always returned as a text block
|
|
123
|
+
text_block = next(
|
|
124
|
+
block for block in response.content if isinstance(block, TextBlock)
|
|
78
125
|
)
|
|
79
|
-
|
|
80
|
-
return cast(TextBlock, response.content[0]).text
|
|
126
|
+
return text_block.text, reasoning
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Base class for LLM API clients."""
|
|
2
|
+
|
|
1
3
|
from abc import ABC, abstractmethod
|
|
2
4
|
from typing import Any
|
|
3
5
|
|
|
@@ -12,9 +14,13 @@ class Base_LLM(ABC):
|
|
|
12
14
|
temperature: float | None = None,
|
|
13
15
|
top_p: float | None = None,
|
|
14
16
|
max_tokens: int | None = None,
|
|
17
|
+
enable_reasoning: bool = False,
|
|
15
18
|
) -> None:
|
|
16
19
|
"""
|
|
17
20
|
Initialise the LLM client.
|
|
21
|
+
|
|
22
|
+
When enable_reasoning is True, generate() and chat() include chain-of-thought
|
|
23
|
+
alongside responses, and reasoning is stored in the chat history.
|
|
18
24
|
"""
|
|
19
25
|
self._model = model
|
|
20
26
|
self._system = system
|
|
@@ -24,6 +30,7 @@ class Base_LLM(ABC):
|
|
|
24
30
|
self._top_p = top_p
|
|
25
31
|
self._max_tokens = max_tokens
|
|
26
32
|
|
|
33
|
+
self._enable_reasoning = enable_reasoning
|
|
27
34
|
self._history: list[dict[str, Any]] | None = None
|
|
28
35
|
|
|
29
36
|
def generate(
|
|
@@ -35,9 +42,12 @@ class Base_LLM(ABC):
|
|
|
35
42
|
temperature: float | None = None,
|
|
36
43
|
top_p: float | None = None,
|
|
37
44
|
max_tokens: int | None = None,
|
|
38
|
-
) -> list[str]:
|
|
45
|
+
) -> list[str] | list[tuple[str, str | None]]:
|
|
39
46
|
"""
|
|
40
47
|
Generate model responses from the LLMs API.
|
|
48
|
+
|
|
49
|
+
When enable_reasoning is True, returns a list of (response, reasoning) tuples.
|
|
50
|
+
When False, returns a list of response strings.
|
|
41
51
|
"""
|
|
42
52
|
_model = model or self._model
|
|
43
53
|
if _model is None:
|
|
@@ -48,16 +58,19 @@ class Base_LLM(ABC):
|
|
|
48
58
|
system=system or self._system,
|
|
49
59
|
)
|
|
50
60
|
|
|
51
|
-
_generations = []
|
|
61
|
+
_generations: list[Any] = []
|
|
52
62
|
for _ in range(samples):
|
|
53
|
-
response = self._get_response(
|
|
63
|
+
response, reasoning = self._get_response(
|
|
54
64
|
input=messages,
|
|
55
65
|
model=_model,
|
|
56
66
|
temperature=temperature or self._temperature,
|
|
57
67
|
top_p=top_p or self._top_p,
|
|
58
68
|
max_tokens=max_tokens or self._max_tokens,
|
|
59
69
|
)
|
|
60
|
-
|
|
70
|
+
if self._enable_reasoning:
|
|
71
|
+
_generations.append((response, reasoning))
|
|
72
|
+
else:
|
|
73
|
+
_generations.append(response)
|
|
61
74
|
|
|
62
75
|
return _generations
|
|
63
76
|
|
|
@@ -69,9 +82,12 @@ class Base_LLM(ABC):
|
|
|
69
82
|
temperature: float | None = None,
|
|
70
83
|
top_p: float | None = None,
|
|
71
84
|
max_tokens: int | None = None,
|
|
72
|
-
) -> str:
|
|
85
|
+
) -> str | tuple[str, str | None]:
|
|
73
86
|
"""
|
|
74
87
|
Generate a model response from the LLMs API, in the ongoing chat.
|
|
88
|
+
|
|
89
|
+
When enable_reasoning is True, reasoning is stored in the history and the
|
|
90
|
+
return value is a (response, reasoning) tuple instead of a plain string.
|
|
75
91
|
"""
|
|
76
92
|
_model = model or self._model
|
|
77
93
|
if _model is None:
|
|
@@ -92,7 +108,7 @@ class Base_LLM(ABC):
|
|
|
92
108
|
)
|
|
93
109
|
)
|
|
94
110
|
|
|
95
|
-
response = self._get_response(
|
|
111
|
+
response, reasoning = self._get_response(
|
|
96
112
|
input=self._history,
|
|
97
113
|
system=system,
|
|
98
114
|
model=_model,
|
|
@@ -101,13 +117,14 @@ class Base_LLM(ABC):
|
|
|
101
117
|
max_tokens=max_tokens or self._max_tokens,
|
|
102
118
|
)
|
|
103
119
|
|
|
104
|
-
#
|
|
105
|
-
self.
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
120
|
+
# build the assistant history entry, attaching reasoning if present
|
|
121
|
+
assistant_message = self._build_message(role="assistant", content=response)
|
|
122
|
+
if self._enable_reasoning and reasoning is not None:
|
|
123
|
+
assistant_message["reasoning_content"] = reasoning
|
|
124
|
+
self._history.append(assistant_message)
|
|
125
|
+
|
|
126
|
+
if self._enable_reasoning:
|
|
127
|
+
return response, reasoning
|
|
111
128
|
return response
|
|
112
129
|
|
|
113
130
|
@property
|
|
@@ -146,9 +163,10 @@ class Base_LLM(ABC):
|
|
|
146
163
|
temperature: float | None = None,
|
|
147
164
|
top_p: float | None = None,
|
|
148
165
|
max_tokens: int | None = None,
|
|
149
|
-
) -> str:
|
|
166
|
+
) -> tuple[str, str | None]:
|
|
150
167
|
"""
|
|
151
168
|
Generate a model response from the LLM API.
|
|
152
169
|
|
|
153
|
-
Returns
|
|
170
|
+
Returns a (response, reasoning) tuple; reasoning is None for models that
|
|
171
|
+
do not produce chain-of-thought output.
|
|
154
172
|
"""
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""Class to access LLMs via the
|
|
1
|
+
"""Class to access LLMs via the DeepSeek API."""
|
|
2
2
|
|
|
3
3
|
import os
|
|
4
4
|
from typing import Any, cast
|
|
@@ -19,11 +19,13 @@ class DeepSeek_LLM(Base_LLM):
|
|
|
19
19
|
temperature: float | None = None,
|
|
20
20
|
top_p: float | None = None,
|
|
21
21
|
max_tokens: int | None = None,
|
|
22
|
+
enable_reasoning: bool = False,
|
|
22
23
|
) -> None:
|
|
23
24
|
"""
|
|
24
25
|
Initialise the DeepSeek client.
|
|
25
26
|
|
|
26
27
|
Requires the DEEPSEEK_API_KEY environment variable to be set.
|
|
28
|
+
Set enable_reasoning=True when using a reasoning model (e.g. deepseek-reasoner).
|
|
27
29
|
"""
|
|
28
30
|
super().__init__(
|
|
29
31
|
model=model,
|
|
@@ -31,6 +33,7 @@ class DeepSeek_LLM(Base_LLM):
|
|
|
31
33
|
temperature=temperature,
|
|
32
34
|
top_p=top_p,
|
|
33
35
|
max_tokens=max_tokens,
|
|
36
|
+
enable_reasoning=enable_reasoning,
|
|
34
37
|
)
|
|
35
38
|
self._client = openai.OpenAI(
|
|
36
39
|
api_key=os.environ["DEEPSEEK_API_KEY"],
|
|
@@ -65,14 +68,21 @@ class DeepSeek_LLM(Base_LLM):
|
|
|
65
68
|
temperature: float | None = None,
|
|
66
69
|
top_p: float | None = None,
|
|
67
70
|
max_tokens: int | None = None,
|
|
68
|
-
) -> str:
|
|
69
|
-
"""Generate a model response from the
|
|
71
|
+
) -> tuple[str, str | None]:
|
|
72
|
+
"""Generate a model response from the DeepSeek API."""
|
|
70
73
|
response = self._client.chat.completions.create(
|
|
71
74
|
messages=cast(list[ChatCompletionMessageParam], input),
|
|
72
75
|
model=model,
|
|
73
76
|
temperature=temperature if temperature is not None else openai.omit,
|
|
74
77
|
top_p=top_p if top_p is not None else openai.omit,
|
|
75
78
|
max_completion_tokens=max_tokens if max_tokens is not None else openai.omit,
|
|
79
|
+
reasoning_effort="high",
|
|
80
|
+
extra_body={"thinking": {"type": "enabled"}},
|
|
76
81
|
)
|
|
82
|
+
message = response.choices[0].message
|
|
83
|
+
|
|
84
|
+
# chain-of-thought from reasoning models (e.g. deepseek-reasoner); None otherwise
|
|
85
|
+
reasoning = getattr(message, "reasoning_content", None)
|
|
86
|
+
|
|
77
87
|
# cast to str as text completions always return string content
|
|
78
|
-
return cast(str,
|
|
88
|
+
return cast(str, message.content), reasoning
|
|
@@ -4,6 +4,7 @@ import os
|
|
|
4
4
|
from typing import Any
|
|
5
5
|
|
|
6
6
|
from mistralai import client
|
|
7
|
+
from mistralai.client.models import TextChunk, ThinkChunk
|
|
7
8
|
|
|
8
9
|
from llm_cgr.llm.clients.base import Base_LLM
|
|
9
10
|
|
|
@@ -18,11 +19,14 @@ class Mistral_LLM(Base_LLM):
|
|
|
18
19
|
temperature: float | None = None,
|
|
19
20
|
top_p: float | None = None,
|
|
20
21
|
max_tokens: int | None = None,
|
|
22
|
+
enable_reasoning: bool = False,
|
|
21
23
|
) -> None:
|
|
22
24
|
"""
|
|
23
25
|
Initialise the Mistral client.
|
|
24
26
|
|
|
25
27
|
Requires the MISTRAL_API_KEY environment variable to be set.
|
|
28
|
+
Set enable_reasoning=True to request chain-of-thought from reasoning
|
|
29
|
+
models (e.g. magistral-medium-latest).
|
|
26
30
|
"""
|
|
27
31
|
super().__init__(
|
|
28
32
|
model=model,
|
|
@@ -30,6 +34,7 @@ class Mistral_LLM(Base_LLM):
|
|
|
30
34
|
temperature=temperature,
|
|
31
35
|
top_p=top_p,
|
|
32
36
|
max_tokens=max_tokens,
|
|
37
|
+
enable_reasoning=enable_reasoning,
|
|
33
38
|
)
|
|
34
39
|
self._client = client.Mistral(
|
|
35
40
|
api_key=os.environ["MISTRAL_API_KEY"],
|
|
@@ -66,7 +71,7 @@ class Mistral_LLM(Base_LLM):
|
|
|
66
71
|
temperature: float | None = None,
|
|
67
72
|
top_p: float | None = None,
|
|
68
73
|
max_tokens: int | None = None,
|
|
69
|
-
) -> str:
|
|
74
|
+
) -> tuple[str, str | None]:
|
|
70
75
|
"""Generate a model response from the MistralAI API."""
|
|
71
76
|
response = self._client.chat.complete(
|
|
72
77
|
model=model,
|
|
@@ -74,5 +79,23 @@ class Mistral_LLM(Base_LLM):
|
|
|
74
79
|
temperature=temperature if temperature is not None else client.UNSET,
|
|
75
80
|
top_p=top_p,
|
|
76
81
|
max_tokens=max_tokens if max_tokens is not None else client.UNSET,
|
|
82
|
+
reasoning_effort="high" if self._enable_reasoning else client.UNSET,
|
|
77
83
|
)
|
|
78
|
-
|
|
84
|
+
content = response.choices[0].message.content
|
|
85
|
+
|
|
86
|
+
# plain string content means no reasoning chunks were returned
|
|
87
|
+
if isinstance(content, str):
|
|
88
|
+
return content, None
|
|
89
|
+
|
|
90
|
+
# otherwise content is a list of chunks: thinking and final text
|
|
91
|
+
reasoning_parts = [
|
|
92
|
+
inner.text
|
|
93
|
+
for chunk in content
|
|
94
|
+
if isinstance(chunk, ThinkChunk)
|
|
95
|
+
for inner in chunk.thinking
|
|
96
|
+
if isinstance(inner, TextChunk)
|
|
97
|
+
]
|
|
98
|
+
text_parts = [chunk.text for chunk in content if isinstance(chunk, TextChunk)]
|
|
99
|
+
|
|
100
|
+
reasoning = "\n".join(reasoning_parts) if reasoning_parts else None
|
|
101
|
+
return "\n".join(text_parts), reasoning
|
|
@@ -19,18 +19,23 @@ class Nscale_LLM(Base_LLM):
|
|
|
19
19
|
temperature: float | None = None,
|
|
20
20
|
top_p: float | None = None,
|
|
21
21
|
max_tokens: int | None = None,
|
|
22
|
+
enable_reasoning: bool = False,
|
|
22
23
|
) -> None:
|
|
23
24
|
"""
|
|
24
25
|
Initialise the NSCALE client.
|
|
25
26
|
|
|
26
27
|
Requires the NSCALE_API_KEY environment variable to be set.
|
|
27
28
|
"""
|
|
29
|
+
if enable_reasoning:
|
|
30
|
+
raise ValueError("Nscale_LLM does not support enable_reasoning.")
|
|
31
|
+
|
|
28
32
|
super().__init__(
|
|
29
33
|
model=model,
|
|
30
34
|
system=system,
|
|
31
35
|
temperature=temperature,
|
|
32
36
|
top_p=top_p,
|
|
33
37
|
max_tokens=max_tokens,
|
|
38
|
+
enable_reasoning=enable_reasoning,
|
|
34
39
|
)
|
|
35
40
|
self._client = openai.OpenAI(
|
|
36
41
|
api_key=os.environ["NSCALE_API_KEY"],
|
|
@@ -65,7 +70,7 @@ class Nscale_LLM(Base_LLM):
|
|
|
65
70
|
temperature: float | None = None,
|
|
66
71
|
top_p: float | None = None,
|
|
67
72
|
max_tokens: int | None = None,
|
|
68
|
-
) -> str:
|
|
73
|
+
) -> tuple[str, str | None]:
|
|
69
74
|
"""Generate a model response from the OpenAI API."""
|
|
70
75
|
response = self._client.chat.completions.create(
|
|
71
76
|
messages=cast(list[ChatCompletionMessageParam], input),
|
|
@@ -75,4 +80,4 @@ class Nscale_LLM(Base_LLM):
|
|
|
75
80
|
max_completion_tokens=max_tokens if max_tokens is not None else openai.omit,
|
|
76
81
|
)
|
|
77
82
|
# cast to str as text completions always return string content
|
|
78
|
-
return cast(str, response.choices[0].message.content)
|
|
83
|
+
return cast(str, response.choices[0].message.content), None
|
|
@@ -18,18 +18,23 @@ class OpenAI_LLM(Base_LLM):
|
|
|
18
18
|
temperature: float | None = None,
|
|
19
19
|
top_p: float | None = None,
|
|
20
20
|
max_tokens: int | None = None,
|
|
21
|
+
enable_reasoning: bool = False,
|
|
21
22
|
) -> None:
|
|
22
23
|
"""
|
|
23
24
|
Initialise the OpenAI client.
|
|
24
25
|
|
|
25
26
|
Requires the OPENAI_API_KEY environment variable to be set.
|
|
26
27
|
"""
|
|
28
|
+
if enable_reasoning:
|
|
29
|
+
raise ValueError("OpenAI_LLM does not support enable_reasoning.")
|
|
30
|
+
|
|
27
31
|
super().__init__(
|
|
28
32
|
model=model,
|
|
29
33
|
system=system,
|
|
30
34
|
temperature=temperature,
|
|
31
35
|
top_p=top_p,
|
|
32
36
|
max_tokens=max_tokens,
|
|
37
|
+
enable_reasoning=enable_reasoning,
|
|
33
38
|
)
|
|
34
39
|
self._client = openai.OpenAI()
|
|
35
40
|
|
|
@@ -61,7 +66,7 @@ class OpenAI_LLM(Base_LLM):
|
|
|
61
66
|
temperature: int | float | None = None,
|
|
62
67
|
top_p: int | float | None = None,
|
|
63
68
|
max_tokens: int | None = None,
|
|
64
|
-
) -> str:
|
|
69
|
+
) -> tuple[str, str | None]:
|
|
65
70
|
"""Generate a model response from the OpenAI API."""
|
|
66
71
|
self._client.responses.input_items
|
|
67
72
|
response = self._client.responses.create(
|
|
@@ -71,4 +76,4 @@ class OpenAI_LLM(Base_LLM):
|
|
|
71
76
|
top_p=top_p if top_p is not None else openai.omit,
|
|
72
77
|
max_output_tokens=max_tokens if max_tokens is not None else openai.omit,
|
|
73
78
|
)
|
|
74
|
-
return response.output_text
|
|
79
|
+
return response.output_text, None
|
|
@@ -17,7 +17,7 @@ class GenerationProtocol(Protocol):
|
|
|
17
17
|
temperature: float | None = None,
|
|
18
18
|
top_p: float | None = None,
|
|
19
19
|
max_tokens: int | None = None,
|
|
20
|
-
) -> list[str]:
|
|
20
|
+
) -> list[str] | list[tuple[str, str | None]]:
|
|
21
21
|
"""
|
|
22
22
|
Generate model responses from the LLMs API.
|
|
23
23
|
"""
|
|
@@ -30,7 +30,7 @@ class GenerationProtocol(Protocol):
|
|
|
30
30
|
temperature: float | None = None,
|
|
31
31
|
top_p: float | None = None,
|
|
32
32
|
max_tokens: int | None = None,
|
|
33
|
-
) -> str:
|
|
33
|
+
) -> str | tuple[str, str | None]:
|
|
34
34
|
"""
|
|
35
35
|
Generate a model response from the LLMs API, in the ongoing chat.
|
|
36
36
|
"""
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Class to access LLMs via the TogetherAI API."""
|
|
2
2
|
|
|
3
|
+
import re
|
|
3
4
|
from typing import Any, cast
|
|
4
5
|
|
|
5
6
|
import together
|
|
@@ -7,6 +8,11 @@ import together
|
|
|
7
8
|
from llm_cgr.llm.clients.base import Base_LLM
|
|
8
9
|
|
|
9
10
|
|
|
11
|
+
# matches a <think>...</think> block at the start of a response, used by
|
|
12
|
+
# models that embed their reasoning trace directly in the content
|
|
13
|
+
_THINK_BLOCK = re.compile(r"\A<think>(.*?)</think>\s*", re.DOTALL)
|
|
14
|
+
|
|
15
|
+
|
|
10
16
|
class TogetherAI_LLM(Base_LLM):
|
|
11
17
|
"""Class to access LLMs via the TogetherAI API."""
|
|
12
18
|
|
|
@@ -17,11 +23,13 @@ class TogetherAI_LLM(Base_LLM):
|
|
|
17
23
|
temperature: float | None = None,
|
|
18
24
|
top_p: float | None = None,
|
|
19
25
|
max_tokens: int | None = None,
|
|
26
|
+
enable_reasoning: bool = False,
|
|
20
27
|
) -> None:
|
|
21
28
|
"""
|
|
22
29
|
Initialise the TogetherAI client.
|
|
23
30
|
|
|
24
31
|
Requires the TOGETHER_API_KEY environment variable to be set.
|
|
32
|
+
Set enable_reasoning=True when using a reasoning model (e.g. deepseek-ai/DeepSeek-R1).
|
|
25
33
|
"""
|
|
26
34
|
super().__init__(
|
|
27
35
|
model=model,
|
|
@@ -29,6 +37,7 @@ class TogetherAI_LLM(Base_LLM):
|
|
|
29
37
|
temperature=temperature,
|
|
30
38
|
top_p=top_p,
|
|
31
39
|
max_tokens=max_tokens,
|
|
40
|
+
enable_reasoning=enable_reasoning,
|
|
32
41
|
)
|
|
33
42
|
self._client = together.Together()
|
|
34
43
|
|
|
@@ -60,7 +69,7 @@ class TogetherAI_LLM(Base_LLM):
|
|
|
60
69
|
temperature: float | None = None,
|
|
61
70
|
top_p: float | None = None,
|
|
62
71
|
max_tokens: int | None = None,
|
|
63
|
-
) -> str:
|
|
72
|
+
) -> tuple[str, str | None]:
|
|
64
73
|
"""Generate a model response from the TogetherAI API."""
|
|
65
74
|
response = self._client.chat.completions.create(
|
|
66
75
|
model=model,
|
|
@@ -72,4 +81,20 @@ class TogetherAI_LLM(Base_LLM):
|
|
|
72
81
|
# cast to Any first as together doesn't publicly export the message type,
|
|
73
82
|
# then cast content to str as text completions always have it set
|
|
74
83
|
message = cast(Any, response.choices[0].message)
|
|
75
|
-
|
|
84
|
+
content = cast(str, message.content)
|
|
85
|
+
|
|
86
|
+
# chain-of-thought from reasoning models: most (e.g. DeepSeek-R1) use
|
|
87
|
+
# reasoning_content, some (e.g. Kimi-K2.6) use reasoning; None otherwise
|
|
88
|
+
reasoning = getattr(message, "reasoning_content", None) or getattr(
|
|
89
|
+
message, "reasoning", None
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# some models embed their reasoning as a <think>...</think> block at
|
|
93
|
+
# the start of content instead of a separate field; pull it out
|
|
94
|
+
if reasoning is None:
|
|
95
|
+
think_match = _THINK_BLOCK.match(content)
|
|
96
|
+
if think_match:
|
|
97
|
+
reasoning = think_match.group(1).strip()
|
|
98
|
+
content = content[think_match.end() :]
|
|
99
|
+
|
|
100
|
+
return content, reasoning
|
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
"""API utilities for interfacing with the generation models."""
|
|
2
2
|
|
|
3
|
+
from typing import Literal, overload
|
|
4
|
+
|
|
3
5
|
from llm_cgr.defaults import DEFAULT_MODEL
|
|
4
6
|
from llm_cgr.llm.clients import get_llm
|
|
5
7
|
from llm_cgr.llm.prompts import BOOL_SYSTEM_PROMPT, LIST_SYSTEM_PROMPT
|
|
6
8
|
|
|
7
9
|
|
|
10
|
+
@overload
|
|
8
11
|
def generate(
|
|
9
12
|
user: str,
|
|
10
13
|
model: str = DEFAULT_MODEL,
|
|
@@ -13,15 +16,47 @@ def generate(
|
|
|
13
16
|
top_p: float | None = None,
|
|
14
17
|
max_tokens: int | None = None,
|
|
15
18
|
provider: str | None = None,
|
|
19
|
+
enable_reasoning: Literal[False] = False,
|
|
16
20
|
**generate_kwargs,
|
|
17
|
-
) -> str:
|
|
21
|
+
) -> str: ...
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@overload
|
|
25
|
+
def generate(
|
|
26
|
+
user: str,
|
|
27
|
+
model: str = DEFAULT_MODEL,
|
|
28
|
+
system: str | None = None,
|
|
29
|
+
temperature: float | None = None,
|
|
30
|
+
top_p: float | None = None,
|
|
31
|
+
max_tokens: int | None = None,
|
|
32
|
+
provider: str | None = None,
|
|
33
|
+
enable_reasoning: Literal[True] = True,
|
|
34
|
+
**generate_kwargs,
|
|
35
|
+
) -> tuple[str, str | None]: ...
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def generate(
|
|
39
|
+
user: str,
|
|
40
|
+
model: str = DEFAULT_MODEL,
|
|
41
|
+
system: str | None = None,
|
|
42
|
+
temperature: float | None = None,
|
|
43
|
+
top_p: float | None = None,
|
|
44
|
+
max_tokens: int | None = None,
|
|
45
|
+
provider: str | None = None,
|
|
46
|
+
enable_reasoning: bool = False,
|
|
47
|
+
**generate_kwargs,
|
|
48
|
+
) -> str | tuple[str, str | None]:
|
|
18
49
|
"""
|
|
19
50
|
Simple function to quickly prompt a model for a response.
|
|
51
|
+
|
|
52
|
+
When enable_reasoning is True, returns a (response, reasoning) tuple instead
|
|
53
|
+
of a plain string.
|
|
20
54
|
"""
|
|
21
55
|
client = get_llm(
|
|
22
56
|
model=model,
|
|
23
57
|
system=system,
|
|
24
58
|
provider=provider,
|
|
59
|
+
enable_reasoning=enable_reasoning,
|
|
25
60
|
)
|
|
26
61
|
[result] = client.generate(
|
|
27
62
|
user=user,
|
{llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_codegen_research.egg-info/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: llm-codegen-research
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.16
|
|
4
4
|
Summary: Useful classes and methods for researching code-generation by LLMs.
|
|
5
5
|
Author-email: Lukas Twist <itsluketwist@gmail.com>
|
|
6
6
|
Project-URL: Homepage, https://github.com/itsluketwist/llm-codegen-research
|
|
@@ -158,7 +158,7 @@ uv add openai
|
|
|
158
158
|
Or to upgrade dependencies:
|
|
159
159
|
|
|
160
160
|
```shell
|
|
161
|
-
uv sync --upgrade
|
|
161
|
+
uv sync --extra api --upgrade
|
|
162
162
|
```
|
|
163
163
|
|
|
164
164
|
Check typings with `ty`:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/analyse/languages/__init__.py
RENAMED
|
File without changes
|
{llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/analyse/languages/code_data.py
RENAMED
|
File without changes
|
{llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/analyse/languages/javascript.py
RENAMED
|
File without changes
|
{llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/analyse/languages/python.py
RENAMED
|
File without changes
|
{llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/analyse/languages/rust.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/openai_tool.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|