llm-codegen-research 2.14__tar.gz → 2.15__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/PKG-INFO +1 -1
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/llm/clients/anthropic.py +2 -2
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/llm/clients/base.py +33 -15
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/llm/clients/deepseek.py +12 -4
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/llm/clients/mistral.py +2 -2
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/llm/clients/nscale.py +2 -2
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/llm/clients/openai.py +2 -2
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/llm/clients/protocol.py +2 -2
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/llm/clients/together.py +2 -2
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/llm/generate.py +4 -1
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_codegen_research.egg-info/PKG-INFO +1 -1
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_codegen_research.egg-info/SOURCES.txt +1 -0
- llm_codegen_research-2.15/tests/test_llm_deepseek_reasoning.py +136 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/LICENSE +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/README.md +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/pyproject.toml +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/setup.cfg +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/__init__.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/analyse/__init__.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/analyse/classes.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/analyse/languages/__init__.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/analyse/languages/code_data.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/analyse/languages/javascript.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/analyse/languages/python.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/analyse/languages/rust.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/analyse/regexes.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/decorators.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/defaults.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/enums.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/json_utils.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/llm/__init__.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/llm/clients/__init__.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/llm/clients/openai_tool.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/llm/prompts.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/py.typed +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/scripts/test_cuda.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/timeout.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_codegen_research.egg-info/dependency_links.txt +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_codegen_research.egg-info/entry_points.txt +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_codegen_research.egg-info/requires.txt +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_codegen_research.egg-info/top_level.txt +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/tests/test_enums.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/tests/test_json_utils.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/tests/test_llm_api.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/tests/test_llm_local.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/tests/test_llm_tool.py +0 -0
- {llm_codegen_research-2.14 → llm_codegen_research-2.15}/tests/test_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: llm-codegen-research
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.15
|
|
4
4
|
Summary: Useful classes and methods for researching code-generation by LLMs.
|
|
5
5
|
Author-email: Lukas Twist <itsluketwist@gmail.com>
|
|
6
6
|
Project-URL: Homepage, https://github.com/itsluketwist/llm-codegen-research
|
{llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/llm/clients/anthropic.py
RENAMED
|
@@ -66,7 +66,7 @@ class Anthropic_LLM(Base_LLM):
|
|
|
66
66
|
temperature: float | None = None,
|
|
67
67
|
top_p: float | None = None,
|
|
68
68
|
max_tokens: int | None = None,
|
|
69
|
-
) -> str:
|
|
69
|
+
) -> tuple[str, str | None]:
|
|
70
70
|
"""Generate a model response from the Anthropic API."""
|
|
71
71
|
response = self._client.messages.create(
|
|
72
72
|
model=model,
|
|
@@ -77,4 +77,4 @@ class Anthropic_LLM(Base_LLM):
|
|
|
77
77
|
max_tokens=max_tokens if max_tokens is not None else DEFAULT_MAX_TOKENS,
|
|
78
78
|
)
|
|
79
79
|
# cast to TextBlock as non-tool, non-thinking requests always return text
|
|
80
|
-
return cast(TextBlock, response.content[0]).text
|
|
80
|
+
return cast(TextBlock, response.content[0]).text, None
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Base class for LLM API clients."""
|
|
2
|
+
|
|
1
3
|
from abc import ABC, abstractmethod
|
|
2
4
|
from typing import Any
|
|
3
5
|
|
|
@@ -12,9 +14,13 @@ class Base_LLM(ABC):
|
|
|
12
14
|
temperature: float | None = None,
|
|
13
15
|
top_p: float | None = None,
|
|
14
16
|
max_tokens: int | None = None,
|
|
17
|
+
enable_reasoning: bool = False,
|
|
15
18
|
) -> None:
|
|
16
19
|
"""
|
|
17
20
|
Initialise the LLM client.
|
|
21
|
+
|
|
22
|
+
When enable_reasoning is True, generate() and chat() include chain-of-thought
|
|
23
|
+
alongside responses, and reasoning is stored in the chat history.
|
|
18
24
|
"""
|
|
19
25
|
self._model = model
|
|
20
26
|
self._system = system
|
|
@@ -24,6 +30,7 @@ class Base_LLM(ABC):
|
|
|
24
30
|
self._top_p = top_p
|
|
25
31
|
self._max_tokens = max_tokens
|
|
26
32
|
|
|
33
|
+
self._enable_reasoning = enable_reasoning
|
|
27
34
|
self._history: list[dict[str, Any]] | None = None
|
|
28
35
|
|
|
29
36
|
def generate(
|
|
@@ -35,9 +42,12 @@ class Base_LLM(ABC):
|
|
|
35
42
|
temperature: float | None = None,
|
|
36
43
|
top_p: float | None = None,
|
|
37
44
|
max_tokens: int | None = None,
|
|
38
|
-
) -> list[str]:
|
|
45
|
+
) -> list[str] | list[tuple[str, str | None]]:
|
|
39
46
|
"""
|
|
40
47
|
Generate model responses from the LLMs API.
|
|
48
|
+
|
|
49
|
+
When enable_reasoning is True, returns a list of (response, reasoning) tuples.
|
|
50
|
+
When False, returns a list of response strings.
|
|
41
51
|
"""
|
|
42
52
|
_model = model or self._model
|
|
43
53
|
if _model is None:
|
|
@@ -48,16 +58,19 @@ class Base_LLM(ABC):
|
|
|
48
58
|
system=system or self._system,
|
|
49
59
|
)
|
|
50
60
|
|
|
51
|
-
_generations = []
|
|
61
|
+
_generations: list[Any] = []
|
|
52
62
|
for _ in range(samples):
|
|
53
|
-
response = self._get_response(
|
|
63
|
+
response, reasoning = self._get_response(
|
|
54
64
|
input=messages,
|
|
55
65
|
model=_model,
|
|
56
66
|
temperature=temperature or self._temperature,
|
|
57
67
|
top_p=top_p or self._top_p,
|
|
58
68
|
max_tokens=max_tokens or self._max_tokens,
|
|
59
69
|
)
|
|
60
|
-
|
|
70
|
+
if self._enable_reasoning:
|
|
71
|
+
_generations.append((response, reasoning))
|
|
72
|
+
else:
|
|
73
|
+
_generations.append(response)
|
|
61
74
|
|
|
62
75
|
return _generations
|
|
63
76
|
|
|
@@ -69,9 +82,12 @@ class Base_LLM(ABC):
|
|
|
69
82
|
temperature: float | None = None,
|
|
70
83
|
top_p: float | None = None,
|
|
71
84
|
max_tokens: int | None = None,
|
|
72
|
-
) -> str:
|
|
85
|
+
) -> str | tuple[str, str | None]:
|
|
73
86
|
"""
|
|
74
87
|
Generate a model response from the LLMs API, in the ongoing chat.
|
|
88
|
+
|
|
89
|
+
When enable_reasoning is True, reasoning is stored in the history and the
|
|
90
|
+
return value is a (response, reasoning) tuple instead of a plain string.
|
|
75
91
|
"""
|
|
76
92
|
_model = model or self._model
|
|
77
93
|
if _model is None:
|
|
@@ -92,7 +108,7 @@ class Base_LLM(ABC):
|
|
|
92
108
|
)
|
|
93
109
|
)
|
|
94
110
|
|
|
95
|
-
response = self._get_response(
|
|
111
|
+
response, reasoning = self._get_response(
|
|
96
112
|
input=self._history,
|
|
97
113
|
system=system,
|
|
98
114
|
model=_model,
|
|
@@ -101,13 +117,14 @@ class Base_LLM(ABC):
|
|
|
101
117
|
max_tokens=max_tokens or self._max_tokens,
|
|
102
118
|
)
|
|
103
119
|
|
|
104
|
-
#
|
|
105
|
-
self.
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
120
|
+
# build the assistant history entry, attaching reasoning if present
|
|
121
|
+
assistant_message = self._build_message(role="assistant", content=response)
|
|
122
|
+
if self._enable_reasoning and reasoning is not None:
|
|
123
|
+
assistant_message["reasoning_content"] = reasoning
|
|
124
|
+
self._history.append(assistant_message)
|
|
125
|
+
|
|
126
|
+
if self._enable_reasoning:
|
|
127
|
+
return response, reasoning
|
|
111
128
|
return response
|
|
112
129
|
|
|
113
130
|
@property
|
|
@@ -146,9 +163,10 @@ class Base_LLM(ABC):
|
|
|
146
163
|
temperature: float | None = None,
|
|
147
164
|
top_p: float | None = None,
|
|
148
165
|
max_tokens: int | None = None,
|
|
149
|
-
) -> str:
|
|
166
|
+
) -> tuple[str, str | None]:
|
|
150
167
|
"""
|
|
151
168
|
Generate a model response from the LLM API.
|
|
152
169
|
|
|
153
|
-
Returns
|
|
170
|
+
Returns a (response, reasoning) tuple; reasoning is None for models that
|
|
171
|
+
do not produce chain-of-thought output.
|
|
154
172
|
"""
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""Class to access LLMs via the
|
|
1
|
+
"""Class to access LLMs via the DeepSeek API."""
|
|
2
2
|
|
|
3
3
|
import os
|
|
4
4
|
from typing import Any, cast
|
|
@@ -19,11 +19,13 @@ class DeepSeek_LLM(Base_LLM):
|
|
|
19
19
|
temperature: float | None = None,
|
|
20
20
|
top_p: float | None = None,
|
|
21
21
|
max_tokens: int | None = None,
|
|
22
|
+
enable_reasoning: bool = False,
|
|
22
23
|
) -> None:
|
|
23
24
|
"""
|
|
24
25
|
Initialise the DeepSeek client.
|
|
25
26
|
|
|
26
27
|
Requires the DEEPSEEK_API_KEY environment variable to be set.
|
|
28
|
+
Set enable_reasoning=True when using a reasoning model (e.g. deepseek-reasoner).
|
|
27
29
|
"""
|
|
28
30
|
super().__init__(
|
|
29
31
|
model=model,
|
|
@@ -31,6 +33,7 @@ class DeepSeek_LLM(Base_LLM):
|
|
|
31
33
|
temperature=temperature,
|
|
32
34
|
top_p=top_p,
|
|
33
35
|
max_tokens=max_tokens,
|
|
36
|
+
enable_reasoning=enable_reasoning,
|
|
34
37
|
)
|
|
35
38
|
self._client = openai.OpenAI(
|
|
36
39
|
api_key=os.environ["DEEPSEEK_API_KEY"],
|
|
@@ -65,8 +68,8 @@ class DeepSeek_LLM(Base_LLM):
|
|
|
65
68
|
temperature: float | None = None,
|
|
66
69
|
top_p: float | None = None,
|
|
67
70
|
max_tokens: int | None = None,
|
|
68
|
-
) -> str:
|
|
69
|
-
"""Generate a model response from the
|
|
71
|
+
) -> tuple[str, str | None]:
|
|
72
|
+
"""Generate a model response from the DeepSeek API."""
|
|
70
73
|
response = self._client.chat.completions.create(
|
|
71
74
|
messages=cast(list[ChatCompletionMessageParam], input),
|
|
72
75
|
model=model,
|
|
@@ -74,5 +77,10 @@ class DeepSeek_LLM(Base_LLM):
|
|
|
74
77
|
top_p=top_p if top_p is not None else openai.omit,
|
|
75
78
|
max_completion_tokens=max_tokens if max_tokens is not None else openai.omit,
|
|
76
79
|
)
|
|
80
|
+
message = response.choices[0].message
|
|
81
|
+
|
|
82
|
+
# chain-of-thought from reasoning models (e.g. deepseek-reasoner); None otherwise
|
|
83
|
+
reasoning = getattr(message, "reasoning_content", None)
|
|
84
|
+
|
|
77
85
|
# cast to str as text completions always return string content
|
|
78
|
-
return cast(str,
|
|
86
|
+
return cast(str, message.content), reasoning
|
|
@@ -66,7 +66,7 @@ class Mistral_LLM(Base_LLM):
|
|
|
66
66
|
temperature: float | None = None,
|
|
67
67
|
top_p: float | None = None,
|
|
68
68
|
max_tokens: int | None = None,
|
|
69
|
-
) -> str:
|
|
69
|
+
) -> tuple[str, str | None]:
|
|
70
70
|
"""Generate a model response from the MistralAI API."""
|
|
71
71
|
response = self._client.chat.complete(
|
|
72
72
|
model=model,
|
|
@@ -75,4 +75,4 @@ class Mistral_LLM(Base_LLM):
|
|
|
75
75
|
top_p=top_p,
|
|
76
76
|
max_tokens=max_tokens if max_tokens is not None else client.UNSET,
|
|
77
77
|
)
|
|
78
|
-
return response.choices[0].message.content
|
|
78
|
+
return response.choices[0].message.content, None
|
|
@@ -65,7 +65,7 @@ class Nscale_LLM(Base_LLM):
|
|
|
65
65
|
temperature: float | None = None,
|
|
66
66
|
top_p: float | None = None,
|
|
67
67
|
max_tokens: int | None = None,
|
|
68
|
-
) -> str:
|
|
68
|
+
) -> tuple[str, str | None]:
|
|
69
69
|
"""Generate a model response from the OpenAI API."""
|
|
70
70
|
response = self._client.chat.completions.create(
|
|
71
71
|
messages=cast(list[ChatCompletionMessageParam], input),
|
|
@@ -75,4 +75,4 @@ class Nscale_LLM(Base_LLM):
|
|
|
75
75
|
max_completion_tokens=max_tokens if max_tokens is not None else openai.omit,
|
|
76
76
|
)
|
|
77
77
|
# cast to str as text completions always return string content
|
|
78
|
-
return cast(str, response.choices[0].message.content)
|
|
78
|
+
return cast(str, response.choices[0].message.content), None
|
|
@@ -61,7 +61,7 @@ class OpenAI_LLM(Base_LLM):
|
|
|
61
61
|
temperature: int | float | None = None,
|
|
62
62
|
top_p: int | float | None = None,
|
|
63
63
|
max_tokens: int | None = None,
|
|
64
|
-
) -> str:
|
|
64
|
+
) -> tuple[str, str | None]:
|
|
65
65
|
"""Generate a model response from the OpenAI API."""
|
|
66
66
|
self._client.responses.input_items
|
|
67
67
|
response = self._client.responses.create(
|
|
@@ -71,4 +71,4 @@ class OpenAI_LLM(Base_LLM):
|
|
|
71
71
|
top_p=top_p if top_p is not None else openai.omit,
|
|
72
72
|
max_output_tokens=max_tokens if max_tokens is not None else openai.omit,
|
|
73
73
|
)
|
|
74
|
-
return response.output_text
|
|
74
|
+
return response.output_text, None
|
|
@@ -17,7 +17,7 @@ class GenerationProtocol(Protocol):
|
|
|
17
17
|
temperature: float | None = None,
|
|
18
18
|
top_p: float | None = None,
|
|
19
19
|
max_tokens: int | None = None,
|
|
20
|
-
) -> list[str]:
|
|
20
|
+
) -> list[str] | list[tuple[str, str | None]]:
|
|
21
21
|
"""
|
|
22
22
|
Generate model responses from the LLMs API.
|
|
23
23
|
"""
|
|
@@ -30,7 +30,7 @@ class GenerationProtocol(Protocol):
|
|
|
30
30
|
temperature: float | None = None,
|
|
31
31
|
top_p: float | None = None,
|
|
32
32
|
max_tokens: int | None = None,
|
|
33
|
-
) -> str:
|
|
33
|
+
) -> str | tuple[str, str | None]:
|
|
34
34
|
"""
|
|
35
35
|
Generate a model response from the LLMs API, in the ongoing chat.
|
|
36
36
|
"""
|
|
@@ -60,7 +60,7 @@ class TogetherAI_LLM(Base_LLM):
|
|
|
60
60
|
temperature: float | None = None,
|
|
61
61
|
top_p: float | None = None,
|
|
62
62
|
max_tokens: int | None = None,
|
|
63
|
-
) -> str:
|
|
63
|
+
) -> tuple[str, str | None]:
|
|
64
64
|
"""Generate a model response from the TogetherAI API."""
|
|
65
65
|
response = self._client.chat.completions.create(
|
|
66
66
|
model=model,
|
|
@@ -72,4 +72,4 @@ class TogetherAI_LLM(Base_LLM):
|
|
|
72
72
|
# cast to Any first as together doesn't publicly export the message type,
|
|
73
73
|
# then cast content to str as text completions always have it set
|
|
74
74
|
message = cast(Any, response.choices[0].message)
|
|
75
|
-
return cast(str, message.content)
|
|
75
|
+
return cast(str, message.content), None
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
"""API utilities for interfacing with the generation models."""
|
|
2
2
|
|
|
3
|
+
from typing import cast
|
|
4
|
+
|
|
3
5
|
from llm_cgr.defaults import DEFAULT_MODEL
|
|
4
6
|
from llm_cgr.llm.clients import get_llm
|
|
5
7
|
from llm_cgr.llm.prompts import BOOL_SYSTEM_PROMPT, LIST_SYSTEM_PROMPT
|
|
@@ -31,7 +33,8 @@ def generate(
|
|
|
31
33
|
max_tokens=max_tokens,
|
|
32
34
|
**generate_kwargs,
|
|
33
35
|
)
|
|
34
|
-
|
|
36
|
+
# enable_reasoning is False by default, so result is always a plain string
|
|
37
|
+
return cast(str, result)
|
|
35
38
|
|
|
36
39
|
|
|
37
40
|
def generate_list(
|
{llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_codegen_research.egg-info/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: llm-codegen-research
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.15
|
|
4
4
|
Summary: Useful classes and methods for researching code-generation by LLMs.
|
|
5
5
|
Author-email: Lukas Twist <itsluketwist@gmail.com>
|
|
6
6
|
Project-URL: Homepage, https://github.com/itsluketwist/llm-codegen-research
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
"""Tests for DeepSeek reasoning model support."""
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from llm_cgr.llm.clients.deepseek import DeepSeek_LLM
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
# mark all tests in this file as api tests, so they can be excluded in ci
|
|
9
|
+
pytestmark = pytest.mark.api
|
|
10
|
+
|
|
11
|
+
# standard model returns no chain-of-thought; reasoner model does
|
|
12
|
+
CHAT_MODEL = "deepseek-chat"
|
|
13
|
+
REASONER_MODEL = "deepseek-reasoner"
|
|
14
|
+
|
|
15
|
+
USER_PROMPT = "How many r's are in 'strawberry'?"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def test_generate_no_reasoning():
|
|
19
|
+
"""
|
|
20
|
+
Test that generate returns plain strings when enable_reasoning is False (default).
|
|
21
|
+
"""
|
|
22
|
+
llm = DeepSeek_LLM(model=CHAT_MODEL)
|
|
23
|
+
results = llm.generate(user=USER_PROMPT)
|
|
24
|
+
|
|
25
|
+
assert isinstance(results, list)
|
|
26
|
+
assert len(results) == 1
|
|
27
|
+
# result should be a plain string, not a tuple
|
|
28
|
+
assert isinstance(results[0], str)
|
|
29
|
+
assert len(results[0]) > 0
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def test_generate_with_reasoning_returns_tuples():
|
|
33
|
+
"""
|
|
34
|
+
Test that generate returns (response, reasoning) tuples when enable_reasoning is True.
|
|
35
|
+
"""
|
|
36
|
+
llm = DeepSeek_LLM(model=REASONER_MODEL, enable_reasoning=True)
|
|
37
|
+
results = llm.generate(user=USER_PROMPT)
|
|
38
|
+
|
|
39
|
+
assert isinstance(results, list)
|
|
40
|
+
assert len(results) == 1
|
|
41
|
+
|
|
42
|
+
response, reasoning = results[0]
|
|
43
|
+
|
|
44
|
+
# response should be a non-empty string
|
|
45
|
+
assert isinstance(response, str)
|
|
46
|
+
assert len(response) > 0
|
|
47
|
+
|
|
48
|
+
# the reasoner model should always produce chain-of-thought
|
|
49
|
+
assert isinstance(reasoning, str)
|
|
50
|
+
assert len(reasoning) > 0
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def test_generate_non_reasoning_model_has_no_reasoning():
|
|
54
|
+
"""
|
|
55
|
+
Test that a standard (non-reasoner) model returns None for reasoning even when enabled.
|
|
56
|
+
"""
|
|
57
|
+
llm = DeepSeek_LLM(model=CHAT_MODEL, enable_reasoning=True)
|
|
58
|
+
results = llm.generate(user=USER_PROMPT)
|
|
59
|
+
|
|
60
|
+
response, reasoning = results[0]
|
|
61
|
+
|
|
62
|
+
assert isinstance(response, str)
|
|
63
|
+
assert len(response) > 0
|
|
64
|
+
# deepseek-chat does not produce reasoning content
|
|
65
|
+
assert reasoning is None
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def test_chat_no_reasoning():
|
|
69
|
+
"""
|
|
70
|
+
Test that chat returns a plain string and history has no reasoning_content
|
|
71
|
+
when enable_reasoning is False (default).
|
|
72
|
+
"""
|
|
73
|
+
llm = DeepSeek_LLM(model=CHAT_MODEL)
|
|
74
|
+
response = llm.chat(user=USER_PROMPT)
|
|
75
|
+
|
|
76
|
+
assert isinstance(response, str)
|
|
77
|
+
assert len(response) > 0
|
|
78
|
+
|
|
79
|
+
# history entries should each have exactly role and content
|
|
80
|
+
history = llm.history
|
|
81
|
+
assert all("reasoning_content" not in msg for msg in history)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def test_chat_with_reasoning_returns_tuple():
|
|
85
|
+
"""
|
|
86
|
+
Test that chat returns a (response, reasoning) tuple when enable_reasoning is True.
|
|
87
|
+
"""
|
|
88
|
+
llm = DeepSeek_LLM(model=REASONER_MODEL, enable_reasoning=True)
|
|
89
|
+
result = llm.chat(user=USER_PROMPT)
|
|
90
|
+
|
|
91
|
+
assert isinstance(result, tuple)
|
|
92
|
+
response, reasoning = result
|
|
93
|
+
|
|
94
|
+
assert isinstance(response, str)
|
|
95
|
+
assert len(response) > 0
|
|
96
|
+
|
|
97
|
+
assert isinstance(reasoning, str)
|
|
98
|
+
assert len(reasoning) > 0
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def test_chat_reasoning_stored_in_history():
|
|
102
|
+
"""
|
|
103
|
+
Test that reasoning is stored on the assistant history entry when enable_reasoning is True.
|
|
104
|
+
"""
|
|
105
|
+
llm = DeepSeek_LLM(model=REASONER_MODEL, enable_reasoning=True)
|
|
106
|
+
llm.chat(user=USER_PROMPT)
|
|
107
|
+
|
|
108
|
+
history = llm.history
|
|
109
|
+
# find the assistant message
|
|
110
|
+
assistant_msgs = [msg for msg in history if msg["role"] == "assistant"]
|
|
111
|
+
assert len(assistant_msgs) == 1
|
|
112
|
+
|
|
113
|
+
assistant_msg = assistant_msgs[0]
|
|
114
|
+
assert "reasoning_content" in assistant_msg
|
|
115
|
+
assert isinstance(assistant_msg["reasoning_content"], str)
|
|
116
|
+
assert len(assistant_msg["reasoning_content"]) > 0
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def test_chat_multi_turn_reasoning_stored_per_turn():
|
|
120
|
+
"""
|
|
121
|
+
Test that reasoning is captured and stored for each turn in a multi-turn chat.
|
|
122
|
+
"""
|
|
123
|
+
llm = DeepSeek_LLM(model=REASONER_MODEL, enable_reasoning=True)
|
|
124
|
+
|
|
125
|
+
llm.chat(user="What is 2 + 2?")
|
|
126
|
+
llm.chat(user="And what is that result multiplied by 3?")
|
|
127
|
+
|
|
128
|
+
history = llm.history
|
|
129
|
+
assistant_msgs = [msg for msg in history if msg["role"] == "assistant"]
|
|
130
|
+
assert len(assistant_msgs) == 2
|
|
131
|
+
|
|
132
|
+
# both assistant turns should have reasoning attached
|
|
133
|
+
for msg in assistant_msgs:
|
|
134
|
+
assert "reasoning_content" in msg
|
|
135
|
+
assert isinstance(msg["reasoning_content"], str)
|
|
136
|
+
assert len(msg["reasoning_content"]) > 0
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/analyse/languages/__init__.py
RENAMED
|
File without changes
|
{llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/analyse/languages/code_data.py
RENAMED
|
File without changes
|
{llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/analyse/languages/javascript.py
RENAMED
|
File without changes
|
{llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/analyse/languages/python.py
RENAMED
|
File without changes
|
{llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/analyse/languages/rust.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/llm/clients/openai_tool.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|