llm-codegen-research 2.15__tar.gz → 2.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/PKG-INFO +2 -2
  2. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/README.md +1 -1
  3. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_cgr/defaults.py +4 -1
  4. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/__init__.py +6 -1
  5. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/anthropic.py +52 -6
  6. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/deepseek.py +2 -0
  7. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/mistral.py +24 -1
  8. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/nscale.py +5 -0
  9. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/openai.py +5 -0
  10. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/together.py +26 -1
  11. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_cgr/llm/generate.py +36 -4
  12. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_codegen_research.egg-info/PKG-INFO +2 -2
  13. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_codegen_research.egg-info/SOURCES.txt +0 -1
  14. llm_codegen_research-2.15/tests/test_llm_deepseek_reasoning.py +0 -136
  15. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/LICENSE +0 -0
  16. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/pyproject.toml +0 -0
  17. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/setup.cfg +0 -0
  18. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_cgr/__init__.py +0 -0
  19. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_cgr/analyse/__init__.py +0 -0
  20. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_cgr/analyse/classes.py +0 -0
  21. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_cgr/analyse/languages/__init__.py +0 -0
  22. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_cgr/analyse/languages/code_data.py +0 -0
  23. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_cgr/analyse/languages/javascript.py +0 -0
  24. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_cgr/analyse/languages/python.py +0 -0
  25. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_cgr/analyse/languages/rust.py +0 -0
  26. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_cgr/analyse/regexes.py +0 -0
  27. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_cgr/decorators.py +0 -0
  28. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_cgr/enums.py +0 -0
  29. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_cgr/json_utils.py +0 -0
  30. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_cgr/llm/__init__.py +0 -0
  31. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/base.py +0 -0
  32. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/openai_tool.py +0 -0
  33. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/protocol.py +0 -0
  34. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_cgr/llm/prompts.py +0 -0
  35. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_cgr/py.typed +0 -0
  36. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_cgr/scripts/test_cuda.py +0 -0
  37. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_cgr/timeout.py +0 -0
  38. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_codegen_research.egg-info/dependency_links.txt +0 -0
  39. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_codegen_research.egg-info/entry_points.txt +0 -0
  40. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_codegen_research.egg-info/requires.txt +0 -0
  41. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/src/llm_codegen_research.egg-info/top_level.txt +0 -0
  42. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/tests/test_enums.py +0 -0
  43. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/tests/test_json_utils.py +0 -0
  44. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/tests/test_llm_api.py +0 -0
  45. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/tests/test_llm_local.py +0 -0
  46. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/tests/test_llm_tool.py +0 -0
  47. {llm_codegen_research-2.15 → llm_codegen_research-2.16}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llm-codegen-research
3
- Version: 2.15
3
+ Version: 2.16
4
4
  Summary: Useful classes and methods for researching code-generation by LLMs.
5
5
  Author-email: Lukas Twist <itsluketwist@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/itsluketwist/llm-codegen-research
@@ -158,7 +158,7 @@ uv add openai
158
158
  Or to upgrade dependencies:
159
159
 
160
160
  ```shell
161
- uv sync --upgrade
161
+ uv sync --extra api --upgrade
162
162
  ```
163
163
 
164
164
  Check typings with `ty`:
@@ -138,7 +138,7 @@ uv add openai
138
138
  Or to upgrade dependencies:
139
139
 
140
140
  ```shell
141
- uv sync --upgrade
141
+ uv sync --extra api --upgrade
142
142
  ```
143
143
 
144
144
  Check typings with `ty`:
@@ -7,4 +7,7 @@ DEFAULT_MODEL = "gpt-4.1-mini-2025-04-14"
7
7
  DEFAULT_CODEBLOCK_LANGUAGE = "python"
8
8
 
9
9
  # the default max_tokens to be used when prompting models
10
- DEFAULT_MAX_TOKENS = 2000
10
+ DEFAULT_MAX_TOKENS = 4096
11
+
12
+ # default token budget for anthropic extended thinking (minimum allowed is 1024)
13
+ DEFAULT_THINKING_BUDGET = 2048
@@ -33,6 +33,7 @@ def get_llm(
33
33
  top_p: float | None = None,
34
34
  max_tokens: int | None = None,
35
35
  provider: str | None = None,
36
+ enable_reasoning: bool = False,
36
37
  tools: list[Tool] | None = None,
37
38
  max_tool_iterations: int = MAX_TOOL_ITERATIONS,
38
39
  max_tool_calls: int = MAX_TOOL_CALLS,
@@ -41,7 +42,8 @@ def get_llm(
41
42
  Initialise the correct LLM client for the given model.
42
43
 
43
44
  If tools are provided, returns an OpenAI_Tool_LLM instance. Tool calls
44
- are currently only supported for OpenAI models.
45
+ are currently only supported for OpenAI models. enable_reasoning is only
46
+ supported by Anthropic, DeepSeek, Mistral, and TogetherAI models.
45
47
  """
46
48
  llm_class: type[Base_LLM]
47
49
  if provider is not None:
@@ -63,6 +65,8 @@ def get_llm(
63
65
  raise NotImplementedError(
64
66
  "Tool calls are only supported for OpenAI models."
65
67
  )
68
+ if enable_reasoning:
69
+ raise ValueError("OpenAI_Tool_LLM does not support enable_reasoning.")
66
70
  return OpenAI_Tool_LLM(
67
71
  tools=tools,
68
72
  model=model,
@@ -80,6 +84,7 @@ def get_llm(
80
84
  temperature=temperature,
81
85
  top_p=top_p,
82
86
  max_tokens=max_tokens,
87
+ enable_reasoning=enable_reasoning,
83
88
  )
84
89
 
85
90
 
@@ -3,9 +3,14 @@
3
3
  from typing import Any, cast
4
4
 
5
5
  import anthropic
6
- from anthropic.types import MessageParam, TextBlock
6
+ from anthropic.types import (
7
+ MessageParam,
8
+ TextBlock,
9
+ ThinkingBlock,
10
+ ThinkingConfigEnabledParam,
11
+ )
7
12
 
8
- from llm_cgr.defaults import DEFAULT_MAX_TOKENS
13
+ from llm_cgr.defaults import DEFAULT_MAX_TOKENS, DEFAULT_THINKING_BUDGET
9
14
  from llm_cgr.llm.clients.base import Base_LLM
10
15
 
11
16
 
@@ -19,11 +24,14 @@ class Anthropic_LLM(Base_LLM):
19
24
  temperature: float | None = None,
20
25
  top_p: float | None = None,
21
26
  max_tokens: int | None = None,
27
+ enable_reasoning: bool = False,
22
28
  ) -> None:
23
29
  """
24
30
  Initialise the Anthropic client.
25
31
 
26
32
  Requires the ANTHROPIC_API_KEY environment variable to be set.
33
+ Set enable_reasoning=True to enable extended thinking on supported models
34
+ (e.g. claude-sonnet-4-5).
27
35
  """
28
36
  super().__init__(
29
37
  model=model,
@@ -31,6 +39,7 @@ class Anthropic_LLM(Base_LLM):
31
39
  temperature=temperature,
32
40
  top_p=top_p,
33
41
  max_tokens=max_tokens,
42
+ enable_reasoning=enable_reasoning,
34
43
  )
35
44
  self._client = anthropic.Anthropic()
36
45
 
@@ -68,13 +77,50 @@ class Anthropic_LLM(Base_LLM):
68
77
  max_tokens: int | None = None,
69
78
  ) -> tuple[str, str | None]:
70
79
  """Generate a model response from the Anthropic API."""
80
+ # extended thinking is incompatible with custom temperature/top_p
81
+ thinking = (
82
+ ThinkingConfigEnabledParam(
83
+ type="enabled",
84
+ budget_tokens=DEFAULT_THINKING_BUDGET,
85
+ )
86
+ if self._enable_reasoning
87
+ else anthropic.omit
88
+ )
89
+ # custom temperature/top_p are not supported alongside extended thinking,
90
+ # and the api rejects requests that set both temperature and top_p
91
+ _temperature = (
92
+ temperature
93
+ if temperature is not None and not self._enable_reasoning
94
+ else anthropic.omit
95
+ )
96
+ _top_p = (
97
+ top_p
98
+ if top_p is not None
99
+ and not self._enable_reasoning
100
+ and _temperature is anthropic.omit
101
+ else anthropic.omit
102
+ )
103
+
71
104
  response = self._client.messages.create(
72
105
  model=model,
73
106
  system=system or self._system or anthropic.omit,
74
107
  messages=cast(list[MessageParam], input),
75
- temperature=temperature if temperature is not None else anthropic.omit,
76
- top_p=top_p if top_p is not None else anthropic.omit,
108
+ temperature=_temperature,
109
+ top_p=_top_p,
77
110
  max_tokens=max_tokens if max_tokens is not None else DEFAULT_MAX_TOKENS,
111
+ thinking=thinking,
112
+ )
113
+
114
+ # collect chain-of-thought from any thinking blocks; None if not present
115
+ thinking_blocks = [
116
+ block.thinking
117
+ for block in response.content
118
+ if isinstance(block, ThinkingBlock)
119
+ ]
120
+ reasoning = "\n".join(thinking_blocks) if thinking_blocks else None
121
+
122
+ # the final answer is always returned as a text block
123
+ text_block = next(
124
+ block for block in response.content if isinstance(block, TextBlock)
78
125
  )
79
- # cast to TextBlock as non-tool, non-thinking requests always return text
80
- return cast(TextBlock, response.content[0]).text, None
126
+ return text_block.text, reasoning
@@ -76,6 +76,8 @@ class DeepSeek_LLM(Base_LLM):
76
76
  temperature=temperature if temperature is not None else openai.omit,
77
77
  top_p=top_p if top_p is not None else openai.omit,
78
78
  max_completion_tokens=max_tokens if max_tokens is not None else openai.omit,
79
+ reasoning_effort="high",
80
+ extra_body={"thinking": {"type": "enabled"}},
79
81
  )
80
82
  message = response.choices[0].message
81
83
 
@@ -4,6 +4,7 @@ import os
4
4
  from typing import Any
5
5
 
6
6
  from mistralai import client
7
+ from mistralai.client.models import TextChunk, ThinkChunk
7
8
 
8
9
  from llm_cgr.llm.clients.base import Base_LLM
9
10
 
@@ -18,11 +19,14 @@ class Mistral_LLM(Base_LLM):
18
19
  temperature: float | None = None,
19
20
  top_p: float | None = None,
20
21
  max_tokens: int | None = None,
22
+ enable_reasoning: bool = False,
21
23
  ) -> None:
22
24
  """
23
25
  Initialise the Mistral client.
24
26
 
25
27
  Requires the MISTRAL_API_KEY environment variable to be set.
28
+ Set enable_reasoning=True to request chain-of-thought from reasoning
29
+ models (e.g. magistral-medium-latest).
26
30
  """
27
31
  super().__init__(
28
32
  model=model,
@@ -30,6 +34,7 @@ class Mistral_LLM(Base_LLM):
30
34
  temperature=temperature,
31
35
  top_p=top_p,
32
36
  max_tokens=max_tokens,
37
+ enable_reasoning=enable_reasoning,
33
38
  )
34
39
  self._client = client.Mistral(
35
40
  api_key=os.environ["MISTRAL_API_KEY"],
@@ -74,5 +79,23 @@ class Mistral_LLM(Base_LLM):
74
79
  temperature=temperature if temperature is not None else client.UNSET,
75
80
  top_p=top_p,
76
81
  max_tokens=max_tokens if max_tokens is not None else client.UNSET,
82
+ reasoning_effort="high" if self._enable_reasoning else client.UNSET,
77
83
  )
78
- return response.choices[0].message.content, None
84
+ content = response.choices[0].message.content
85
+
86
+ # plain string content means no reasoning chunks were returned
87
+ if isinstance(content, str):
88
+ return content, None
89
+
90
+ # otherwise content is a list of chunks: thinking and final text
91
+ reasoning_parts = [
92
+ inner.text
93
+ for chunk in content
94
+ if isinstance(chunk, ThinkChunk)
95
+ for inner in chunk.thinking
96
+ if isinstance(inner, TextChunk)
97
+ ]
98
+ text_parts = [chunk.text for chunk in content if isinstance(chunk, TextChunk)]
99
+
100
+ reasoning = "\n".join(reasoning_parts) if reasoning_parts else None
101
+ return "\n".join(text_parts), reasoning
@@ -19,18 +19,23 @@ class Nscale_LLM(Base_LLM):
19
19
  temperature: float | None = None,
20
20
  top_p: float | None = None,
21
21
  max_tokens: int | None = None,
22
+ enable_reasoning: bool = False,
22
23
  ) -> None:
23
24
  """
24
25
  Initialise the NSCALE client.
25
26
 
26
27
  Requires the NSCALE_API_KEY environment variable to be set.
27
28
  """
29
+ if enable_reasoning:
30
+ raise ValueError("Nscale_LLM does not support enable_reasoning.")
31
+
28
32
  super().__init__(
29
33
  model=model,
30
34
  system=system,
31
35
  temperature=temperature,
32
36
  top_p=top_p,
33
37
  max_tokens=max_tokens,
38
+ enable_reasoning=enable_reasoning,
34
39
  )
35
40
  self._client = openai.OpenAI(
36
41
  api_key=os.environ["NSCALE_API_KEY"],
@@ -18,18 +18,23 @@ class OpenAI_LLM(Base_LLM):
18
18
  temperature: float | None = None,
19
19
  top_p: float | None = None,
20
20
  max_tokens: int | None = None,
21
+ enable_reasoning: bool = False,
21
22
  ) -> None:
22
23
  """
23
24
  Initialise the OpenAI client.
24
25
 
25
26
  Requires the OPENAI_API_KEY environment variable to be set.
26
27
  """
28
+ if enable_reasoning:
29
+ raise ValueError("OpenAI_LLM does not support enable_reasoning.")
30
+
27
31
  super().__init__(
28
32
  model=model,
29
33
  system=system,
30
34
  temperature=temperature,
31
35
  top_p=top_p,
32
36
  max_tokens=max_tokens,
37
+ enable_reasoning=enable_reasoning,
33
38
  )
34
39
  self._client = openai.OpenAI()
35
40
 
@@ -1,5 +1,6 @@
1
1
  """Class to access LLMs via the TogetherAI API."""
2
2
 
3
+ import re
3
4
  from typing import Any, cast
4
5
 
5
6
  import together
@@ -7,6 +8,11 @@ import together
7
8
  from llm_cgr.llm.clients.base import Base_LLM
8
9
 
9
10
 
11
+ # matches a <think>...</think> block at the start of a response, used by
12
+ # models that embed their reasoning trace directly in the content
13
+ _THINK_BLOCK = re.compile(r"\A<think>(.*?)</think>\s*", re.DOTALL)
14
+
15
+
10
16
  class TogetherAI_LLM(Base_LLM):
11
17
  """Class to access LLMs via the TogetherAI API."""
12
18
 
@@ -17,11 +23,13 @@ class TogetherAI_LLM(Base_LLM):
17
23
  temperature: float | None = None,
18
24
  top_p: float | None = None,
19
25
  max_tokens: int | None = None,
26
+ enable_reasoning: bool = False,
20
27
  ) -> None:
21
28
  """
22
29
  Initialise the TogetherAI client.
23
30
 
24
31
  Requires the TOGETHER_API_KEY environment variable to be set.
32
+ Set enable_reasoning=True when using a reasoning model (e.g. deepseek-ai/DeepSeek-R1).
25
33
  """
26
34
  super().__init__(
27
35
  model=model,
@@ -29,6 +37,7 @@ class TogetherAI_LLM(Base_LLM):
29
37
  temperature=temperature,
30
38
  top_p=top_p,
31
39
  max_tokens=max_tokens,
40
+ enable_reasoning=enable_reasoning,
32
41
  )
33
42
  self._client = together.Together()
34
43
 
@@ -72,4 +81,20 @@ class TogetherAI_LLM(Base_LLM):
72
81
  # cast to Any first as together doesn't publicly export the message type,
73
82
  # then cast content to str as text completions always have it set
74
83
  message = cast(Any, response.choices[0].message)
75
- return cast(str, message.content), None
84
+ content = cast(str, message.content)
85
+
86
+ # chain-of-thought from reasoning models: most (e.g. DeepSeek-R1) use
87
+ # reasoning_content, some (e.g. Kimi-K2.6) use reasoning; None otherwise
88
+ reasoning = getattr(message, "reasoning_content", None) or getattr(
89
+ message, "reasoning", None
90
+ )
91
+
92
+ # some models embed their reasoning as a <think>...</think> block at
93
+ # the start of content instead of a separate field; pull it out
94
+ if reasoning is None:
95
+ think_match = _THINK_BLOCK.match(content)
96
+ if think_match:
97
+ reasoning = think_match.group(1).strip()
98
+ content = content[think_match.end() :]
99
+
100
+ return content, reasoning
@@ -1,12 +1,13 @@
1
1
  """API utilities for interfacing with the generation models."""
2
2
 
3
- from typing import cast
3
+ from typing import Literal, overload
4
4
 
5
5
  from llm_cgr.defaults import DEFAULT_MODEL
6
6
  from llm_cgr.llm.clients import get_llm
7
7
  from llm_cgr.llm.prompts import BOOL_SYSTEM_PROMPT, LIST_SYSTEM_PROMPT
8
8
 
9
9
 
10
+ @overload
10
11
  def generate(
11
12
  user: str,
12
13
  model: str = DEFAULT_MODEL,
@@ -15,15 +16,47 @@ def generate(
15
16
  top_p: float | None = None,
16
17
  max_tokens: int | None = None,
17
18
  provider: str | None = None,
19
+ enable_reasoning: Literal[False] = False,
18
20
  **generate_kwargs,
19
- ) -> str:
21
+ ) -> str: ...
22
+
23
+
24
+ @overload
25
+ def generate(
26
+ user: str,
27
+ model: str = DEFAULT_MODEL,
28
+ system: str | None = None,
29
+ temperature: float | None = None,
30
+ top_p: float | None = None,
31
+ max_tokens: int | None = None,
32
+ provider: str | None = None,
33
+ enable_reasoning: Literal[True] = True,
34
+ **generate_kwargs,
35
+ ) -> tuple[str, str | None]: ...
36
+
37
+
38
+ def generate(
39
+ user: str,
40
+ model: str = DEFAULT_MODEL,
41
+ system: str | None = None,
42
+ temperature: float | None = None,
43
+ top_p: float | None = None,
44
+ max_tokens: int | None = None,
45
+ provider: str | None = None,
46
+ enable_reasoning: bool = False,
47
+ **generate_kwargs,
48
+ ) -> str | tuple[str, str | None]:
20
49
  """
21
50
  Simple function to quickly prompt a model for a response.
51
+
52
+ When enable_reasoning is True, returns a (response, reasoning) tuple instead
53
+ of a plain string.
22
54
  """
23
55
  client = get_llm(
24
56
  model=model,
25
57
  system=system,
26
58
  provider=provider,
59
+ enable_reasoning=enable_reasoning,
27
60
  )
28
61
  [result] = client.generate(
29
62
  user=user,
@@ -33,8 +66,7 @@ def generate(
33
66
  max_tokens=max_tokens,
34
67
  **generate_kwargs,
35
68
  )
36
- # enable_reasoning is False by default, so result is always a plain string
37
- return cast(str, result)
69
+ return result
38
70
 
39
71
 
40
72
  def generate_list(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llm-codegen-research
3
- Version: 2.15
3
+ Version: 2.16
4
4
  Summary: Useful classes and methods for researching code-generation by LLMs.
5
5
  Author-email: Lukas Twist <itsluketwist@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/itsluketwist/llm-codegen-research
@@ -158,7 +158,7 @@ uv add openai
158
158
  Or to upgrade dependencies:
159
159
 
160
160
  ```shell
161
- uv sync --upgrade
161
+ uv sync --extra api --upgrade
162
162
  ```
163
163
 
164
164
  Check typings with `ty`:
@@ -39,7 +39,6 @@ src/llm_codegen_research.egg-info/top_level.txt
39
39
  tests/test_enums.py
40
40
  tests/test_json_utils.py
41
41
  tests/test_llm_api.py
42
- tests/test_llm_deepseek_reasoning.py
43
42
  tests/test_llm_local.py
44
43
  tests/test_llm_tool.py
45
44
  tests/test_utils.py
@@ -1,136 +0,0 @@
1
- """Tests for DeepSeek reasoning model support."""
2
-
3
- import pytest
4
-
5
- from llm_cgr.llm.clients.deepseek import DeepSeek_LLM
6
-
7
-
8
- # mark all tests in this file as api tests, so they can be excluded in ci
9
- pytestmark = pytest.mark.api
10
-
11
- # standard model returns no chain-of-thought; reasoner model does
12
- CHAT_MODEL = "deepseek-chat"
13
- REASONER_MODEL = "deepseek-reasoner"
14
-
15
- USER_PROMPT = "How many r's are in 'strawberry'?"
16
-
17
-
18
- def test_generate_no_reasoning():
19
- """
20
- Test that generate returns plain strings when enable_reasoning is False (default).
21
- """
22
- llm = DeepSeek_LLM(model=CHAT_MODEL)
23
- results = llm.generate(user=USER_PROMPT)
24
-
25
- assert isinstance(results, list)
26
- assert len(results) == 1
27
- # result should be a plain string, not a tuple
28
- assert isinstance(results[0], str)
29
- assert len(results[0]) > 0
30
-
31
-
32
- def test_generate_with_reasoning_returns_tuples():
33
- """
34
- Test that generate returns (response, reasoning) tuples when enable_reasoning is True.
35
- """
36
- llm = DeepSeek_LLM(model=REASONER_MODEL, enable_reasoning=True)
37
- results = llm.generate(user=USER_PROMPT)
38
-
39
- assert isinstance(results, list)
40
- assert len(results) == 1
41
-
42
- response, reasoning = results[0]
43
-
44
- # response should be a non-empty string
45
- assert isinstance(response, str)
46
- assert len(response) > 0
47
-
48
- # the reasoner model should always produce chain-of-thought
49
- assert isinstance(reasoning, str)
50
- assert len(reasoning) > 0
51
-
52
-
53
- def test_generate_non_reasoning_model_has_no_reasoning():
54
- """
55
- Test that a standard (non-reasoner) model returns None for reasoning even when enabled.
56
- """
57
- llm = DeepSeek_LLM(model=CHAT_MODEL, enable_reasoning=True)
58
- results = llm.generate(user=USER_PROMPT)
59
-
60
- response, reasoning = results[0]
61
-
62
- assert isinstance(response, str)
63
- assert len(response) > 0
64
- # deepseek-chat does not produce reasoning content
65
- assert reasoning is None
66
-
67
-
68
- def test_chat_no_reasoning():
69
- """
70
- Test that chat returns a plain string and history has no reasoning_content
71
- when enable_reasoning is False (default).
72
- """
73
- llm = DeepSeek_LLM(model=CHAT_MODEL)
74
- response = llm.chat(user=USER_PROMPT)
75
-
76
- assert isinstance(response, str)
77
- assert len(response) > 0
78
-
79
- # history entries should each have exactly role and content
80
- history = llm.history
81
- assert all("reasoning_content" not in msg for msg in history)
82
-
83
-
84
- def test_chat_with_reasoning_returns_tuple():
85
- """
86
- Test that chat returns a (response, reasoning) tuple when enable_reasoning is True.
87
- """
88
- llm = DeepSeek_LLM(model=REASONER_MODEL, enable_reasoning=True)
89
- result = llm.chat(user=USER_PROMPT)
90
-
91
- assert isinstance(result, tuple)
92
- response, reasoning = result
93
-
94
- assert isinstance(response, str)
95
- assert len(response) > 0
96
-
97
- assert isinstance(reasoning, str)
98
- assert len(reasoning) > 0
99
-
100
-
101
- def test_chat_reasoning_stored_in_history():
102
- """
103
- Test that reasoning is stored on the assistant history entry when enable_reasoning is True.
104
- """
105
- llm = DeepSeek_LLM(model=REASONER_MODEL, enable_reasoning=True)
106
- llm.chat(user=USER_PROMPT)
107
-
108
- history = llm.history
109
- # find the assistant message
110
- assistant_msgs = [msg for msg in history if msg["role"] == "assistant"]
111
- assert len(assistant_msgs) == 1
112
-
113
- assistant_msg = assistant_msgs[0]
114
- assert "reasoning_content" in assistant_msg
115
- assert isinstance(assistant_msg["reasoning_content"], str)
116
- assert len(assistant_msg["reasoning_content"]) > 0
117
-
118
-
119
- def test_chat_multi_turn_reasoning_stored_per_turn():
120
- """
121
- Test that reasoning is captured and stored for each turn in a multi-turn chat.
122
- """
123
- llm = DeepSeek_LLM(model=REASONER_MODEL, enable_reasoning=True)
124
-
125
- llm.chat(user="What is 2 + 2?")
126
- llm.chat(user="And what is that result multiplied by 3?")
127
-
128
- history = llm.history
129
- assistant_msgs = [msg for msg in history if msg["role"] == "assistant"]
130
- assert len(assistant_msgs) == 2
131
-
132
- # both assistant turns should have reasoning attached
133
- for msg in assistant_msgs:
134
- assert "reasoning_content" in msg
135
- assert isinstance(msg["reasoning_content"], str)
136
- assert len(msg["reasoning_content"]) > 0