PyPI - llm-codegen-research - Versions diffs - 2.14__tar.gz → 2.15__tar.gz - Mend

llm-codegen-research 2.14tar.gz → 2.15tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

{llm_codegen_research-2.14 → llm_codegen_research-2.15}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: llm-codegen-research
-Version: 2.14
+Version: 2.15
 Summary: Useful classes and methods for researching code-generation by LLMs.
 Author-email: Lukas Twist <itsluketwist@gmail.com>
 Project-URL: Homepage, https://github.com/itsluketwist/llm-codegen-research

{llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/llm/clients/anthropic.py RENAMED Viewed

@@ -66,7 +66,7 @@ class Anthropic_LLM(Base_LLM):
         temperature: float | None = None,
         top_p: float | None = None,
         max_tokens: int | None = None,
-    ) -> str:
+    ) -> tuple[str, str | None]:
         """Generate a model response from the Anthropic API."""
         response = self._client.messages.create(
             model=model,
@@ -77,4 +77,4 @@ class Anthropic_LLM(Base_LLM):
             max_tokens=max_tokens if max_tokens is not None else DEFAULT_MAX_TOKENS,
         )
         # cast to TextBlock as non-tool, non-thinking requests always return text
-        return cast(TextBlock, response.content[0]).text
+        return cast(TextBlock, response.content[0]).text, None

{llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/llm/clients/base.py RENAMED Viewed

@@ -1,3 +1,5 @@
+"""Base class for LLM API clients."""
 from abc import ABC, abstractmethod
 from typing import Any
@@ -12,9 +14,13 @@ class Base_LLM(ABC):
         temperature: float | None = None,
         top_p: float | None = None,
         max_tokens: int | None = None,
+        enable_reasoning: bool = False,
     ) -> None:
         """
         Initialise the LLM client.
+        When enable_reasoning is True, generate() and chat() include chain-of-thought
+        alongside responses, and reasoning is stored in the chat history.
         """
         self._model = model
         self._system = system
@@ -24,6 +30,7 @@ class Base_LLM(ABC):
         self._top_p = top_p
         self._max_tokens = max_tokens
+        self._enable_reasoning = enable_reasoning
         self._history: list[dict[str, Any]] | None = None
     def generate(
@@ -35,9 +42,12 @@ class Base_LLM(ABC):
         temperature: float | None = None,
         top_p: float | None = None,
         max_tokens: int | None = None,
-    ) -> list[str]:
+    ) -> list[str] | list[tuple[str, str | None]]:
         """
         Generate model responses from the LLMs API.
+        When enable_reasoning is True, returns a list of (response, reasoning) tuples.
+        When False, returns a list of response strings.
         """
         _model = model or self._model
         if _model is None:
@@ -48,16 +58,19 @@ class Base_LLM(ABC):
             system=system or self._system,
         )
-        _generations = []
+        _generations: list[Any] = []
         for _ in range(samples):
-            response = self._get_response(
+            response, reasoning = self._get_response(
                 input=messages,
                 model=_model,
                 temperature=temperature or self._temperature,
                 top_p=top_p or self._top_p,
                 max_tokens=max_tokens or self._max_tokens,
             )
-            _generations.append(response)
+            if self._enable_reasoning:
+                _generations.append((response, reasoning))
+            else:
+                _generations.append(response)
         return _generations
@@ -69,9 +82,12 @@ class Base_LLM(ABC):
         temperature: float | None = None,
         top_p: float | None = None,
         max_tokens: int | None = None,
-    ) -> str:
+    ) -> str | tuple[str, str | None]:
         """
         Generate a model response from the LLMs API, in the ongoing chat.
+        When enable_reasoning is True, reasoning is stored in the history and the
+        return value is a (response, reasoning) tuple instead of a plain string.
         """
         _model = model or self._model
         if _model is None:
@@ -92,7 +108,7 @@ class Base_LLM(ABC):
                 )
             )
-        response = self._get_response(
+        response, reasoning = self._get_response(
             input=self._history,
             system=system,
             model=_model,
@@ -101,13 +117,14 @@ class Base_LLM(ABC):
             max_tokens=max_tokens or self._max_tokens,
         )
-        # update the history and return
-        self._history.append(
-            self._build_message(
-                role="assistant",
-                content=response,
-            )
-        )
+        # build the assistant history entry, attaching reasoning if present
+        assistant_message = self._build_message(role="assistant", content=response)
+        if self._enable_reasoning and reasoning is not None:
+            assistant_message["reasoning_content"] = reasoning
+        self._history.append(assistant_message)
+        if self._enable_reasoning:
+            return response, reasoning
         return response
     @property
@@ -146,9 +163,10 @@ class Base_LLM(ABC):
         temperature: float | None = None,
         top_p: float | None = None,
         max_tokens: int | None = None,
-    ) -> str:
+    ) -> tuple[str, str | None]:
         """
         Generate a model response from the LLM API.
-        Returns the text response to the prompt.
+        Returns a (response, reasoning) tuple; reasoning is None for models that
+        do not produce chain-of-thought output.
         """

{llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/llm/clients/deepseek.py RENAMED Viewed

@@ -1,4 +1,4 @@
-"""Class to access LLMs via the OpenAI API."""
+"""Class to access LLMs via the DeepSeek API."""
 import os
 from typing import Any, cast
@@ -19,11 +19,13 @@ class DeepSeek_LLM(Base_LLM):
         temperature: float | None = None,
         top_p: float | None = None,
         max_tokens: int | None = None,
+        enable_reasoning: bool = False,
     ) -> None:
         """
         Initialise the DeepSeek client.
         Requires the DEEPSEEK_API_KEY environment variable to be set.
+        Set enable_reasoning=True when using a reasoning model (e.g. deepseek-reasoner).
         """
         super().__init__(
             model=model,
@@ -31,6 +33,7 @@ class DeepSeek_LLM(Base_LLM):
             temperature=temperature,
             top_p=top_p,
             max_tokens=max_tokens,
+            enable_reasoning=enable_reasoning,
         )
         self._client = openai.OpenAI(
             api_key=os.environ["DEEPSEEK_API_KEY"],
@@ -65,8 +68,8 @@ class DeepSeek_LLM(Base_LLM):
         temperature: float | None = None,
         top_p: float | None = None,
         max_tokens: int | None = None,
-    ) -> str:
-        """Generate a model response from the OpenAI API."""
+    ) -> tuple[str, str | None]:
+        """Generate a model response from the DeepSeek API."""
         response = self._client.chat.completions.create(
             messages=cast(list[ChatCompletionMessageParam], input),
             model=model,
@@ -74,5 +77,10 @@ class DeepSeek_LLM(Base_LLM):
             top_p=top_p if top_p is not None else openai.omit,
             max_completion_tokens=max_tokens if max_tokens is not None else openai.omit,
         )
+        message = response.choices[0].message
+        # chain-of-thought from reasoning models (e.g. deepseek-reasoner); None otherwise
+        reasoning = getattr(message, "reasoning_content", None)
         # cast to str as text completions always return string content
-        return cast(str, response.choices[0].message.content)
+        return cast(str, message.content), reasoning

{llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/llm/clients/mistral.py RENAMED Viewed

@@ -66,7 +66,7 @@ class Mistral_LLM(Base_LLM):
         temperature: float | None = None,
         top_p: float | None = None,
         max_tokens: int | None = None,
-    ) -> str:
+    ) -> tuple[str, str | None]:
         """Generate a model response from the MistralAI API."""
         response = self._client.chat.complete(
             model=model,
@@ -75,4 +75,4 @@ class Mistral_LLM(Base_LLM):
             top_p=top_p,
             max_tokens=max_tokens if max_tokens is not None else client.UNSET,
         )
-        return response.choices[0].message.content
+        return response.choices[0].message.content, None

{llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/llm/clients/nscale.py RENAMED Viewed

@@ -65,7 +65,7 @@ class Nscale_LLM(Base_LLM):
         temperature: float | None = None,
         top_p: float | None = None,
         max_tokens: int | None = None,
-    ) -> str:
+    ) -> tuple[str, str | None]:
         """Generate a model response from the OpenAI API."""
         response = self._client.chat.completions.create(
             messages=cast(list[ChatCompletionMessageParam], input),
@@ -75,4 +75,4 @@ class Nscale_LLM(Base_LLM):
             max_completion_tokens=max_tokens if max_tokens is not None else openai.omit,
         )
         # cast to str as text completions always return string content
-        return cast(str, response.choices[0].message.content)
+        return cast(str, response.choices[0].message.content), None

{llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/llm/clients/openai.py RENAMED Viewed

@@ -61,7 +61,7 @@ class OpenAI_LLM(Base_LLM):
         temperature: int | float | None = None,
         top_p: int | float | None = None,
         max_tokens: int | None = None,
-    ) -> str:
+    ) -> tuple[str, str | None]:
         """Generate a model response from the OpenAI API."""
         self._client.responses.input_items
         response = self._client.responses.create(
@@ -71,4 +71,4 @@ class OpenAI_LLM(Base_LLM):
             top_p=top_p if top_p is not None else openai.omit,
             max_output_tokens=max_tokens if max_tokens is not None else openai.omit,
         )
-        return response.output_text
+        return response.output_text, None

{llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/llm/clients/protocol.py RENAMED Viewed

@@ -17,7 +17,7 @@ class GenerationProtocol(Protocol):
         temperature: float | None = None,
         top_p: float | None = None,
         max_tokens: int | None = None,
-    ) -> list[str]:
+    ) -> list[str] | list[tuple[str, str | None]]:
         """
         Generate model responses from the LLMs API.
         """
@@ -30,7 +30,7 @@ class GenerationProtocol(Protocol):
         temperature: float | None = None,
         top_p: float | None = None,
         max_tokens: int | None = None,
-    ) -> str:
+    ) -> str | tuple[str, str | None]:
         """
         Generate a model response from the LLMs API, in the ongoing chat.
         """

{llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/llm/clients/together.py RENAMED Viewed

@@ -60,7 +60,7 @@ class TogetherAI_LLM(Base_LLM):
         temperature: float | None = None,
         top_p: float | None = None,
         max_tokens: int | None = None,
-    ) -> str:
+    ) -> tuple[str, str | None]:
         """Generate a model response from the TogetherAI API."""
         response = self._client.chat.completions.create(
             model=model,
@@ -72,4 +72,4 @@ class TogetherAI_LLM(Base_LLM):
         # cast to Any first as together doesn't publicly export the message type,
         # then cast content to str as text completions always have it set
         message = cast(Any, response.choices[0].message)
-        return cast(str, message.content)
+        return cast(str, message.content), None

{llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_cgr/llm/generate.py RENAMED Viewed

@@ -1,5 +1,7 @@
 """API utilities for interfacing with the generation models."""
+from typing import cast
 from llm_cgr.defaults import DEFAULT_MODEL
 from llm_cgr.llm.clients import get_llm
 from llm_cgr.llm.prompts import BOOL_SYSTEM_PROMPT, LIST_SYSTEM_PROMPT
@@ -31,7 +33,8 @@ def generate(
         max_tokens=max_tokens,
         **generate_kwargs,
     )
-    return result
+    # enable_reasoning is False by default, so result is always a plain string
+    return cast(str, result)
 def generate_list(

{llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_codegen_research.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: llm-codegen-research
-Version: 2.14
+Version: 2.15
 Summary: Useful classes and methods for researching code-generation by LLMs.
 Author-email: Lukas Twist <itsluketwist@gmail.com>
 Project-URL: Homepage, https://github.com/itsluketwist/llm-codegen-research

{llm_codegen_research-2.14 → llm_codegen_research-2.15}/src/llm_codegen_research.egg-info/SOURCES.txt RENAMED Viewed

@@ -39,6 +39,7 @@ src/llm_codegen_research.egg-info/top_level.txt
 tests/test_enums.py
 tests/test_json_utils.py
 tests/test_llm_api.py
+tests/test_llm_deepseek_reasoning.py
 tests/test_llm_local.py
 tests/test_llm_tool.py
 tests/test_utils.py

llm_codegen_research-2.15/tests/test_llm_deepseek_reasoning.py ADDED Viewed

@@ -0,0 +1,136 @@
+"""Tests for DeepSeek reasoning model support."""
+import pytest
+from llm_cgr.llm.clients.deepseek import DeepSeek_LLM
+# mark all tests in this file as api tests, so they can be excluded in ci
+pytestmark = pytest.mark.api
+# standard model returns no chain-of-thought; reasoner model does
+CHAT_MODEL = "deepseek-chat"
+REASONER_MODEL = "deepseek-reasoner"
+USER_PROMPT = "How many r's are in 'strawberry'?"
+def test_generate_no_reasoning():
+    """
+    Test that generate returns plain strings when enable_reasoning is False (default).
+    """
+    llm = DeepSeek_LLM(model=CHAT_MODEL)
+    results = llm.generate(user=USER_PROMPT)
+    assert isinstance(results, list)
+    assert len(results) == 1
+    # result should be a plain string, not a tuple
+    assert isinstance(results[0], str)
+    assert len(results[0]) > 0
+def test_generate_with_reasoning_returns_tuples():
+    """
+    Test that generate returns (response, reasoning) tuples when enable_reasoning is True.
+    """
+    llm = DeepSeek_LLM(model=REASONER_MODEL, enable_reasoning=True)
+    results = llm.generate(user=USER_PROMPT)
+    assert isinstance(results, list)
+    assert len(results) == 1
+    response, reasoning = results[0]
+    # response should be a non-empty string
+    assert isinstance(response, str)
+    assert len(response) > 0
+    # the reasoner model should always produce chain-of-thought
+    assert isinstance(reasoning, str)
+    assert len(reasoning) > 0
+def test_generate_non_reasoning_model_has_no_reasoning():
+    """
+    Test that a standard (non-reasoner) model returns None for reasoning even when enabled.
+    """
+    llm = DeepSeek_LLM(model=CHAT_MODEL, enable_reasoning=True)
+    results = llm.generate(user=USER_PROMPT)
+    response, reasoning = results[0]
+    assert isinstance(response, str)
+    assert len(response) > 0
+    # deepseek-chat does not produce reasoning content
+    assert reasoning is None
+def test_chat_no_reasoning():
+    """
+    Test that chat returns a plain string and history has no reasoning_content
+    when enable_reasoning is False (default).
+    """
+    llm = DeepSeek_LLM(model=CHAT_MODEL)
+    response = llm.chat(user=USER_PROMPT)
+    assert isinstance(response, str)
+    assert len(response) > 0
+    # history entries should each have exactly role and content
+    history = llm.history
+    assert all("reasoning_content" not in msg for msg in history)
+def test_chat_with_reasoning_returns_tuple():
+    """
+    Test that chat returns a (response, reasoning) tuple when enable_reasoning is True.
+    """
+    llm = DeepSeek_LLM(model=REASONER_MODEL, enable_reasoning=True)
+    result = llm.chat(user=USER_PROMPT)
+    assert isinstance(result, tuple)
+    response, reasoning = result
+    assert isinstance(response, str)
+    assert len(response) > 0
+    assert isinstance(reasoning, str)
+    assert len(reasoning) > 0
+def test_chat_reasoning_stored_in_history():
+    """
+    Test that reasoning is stored on the assistant history entry when enable_reasoning is True.
+    """
+    llm = DeepSeek_LLM(model=REASONER_MODEL, enable_reasoning=True)
+    llm.chat(user=USER_PROMPT)
+    history = llm.history
+    # find the assistant message
+    assistant_msgs = [msg for msg in history if msg["role"] == "assistant"]
+    assert len(assistant_msgs) == 1
+    assistant_msg = assistant_msgs[0]
+    assert "reasoning_content" in assistant_msg
+    assert isinstance(assistant_msg["reasoning_content"], str)
+    assert len(assistant_msg["reasoning_content"]) > 0
+def test_chat_multi_turn_reasoning_stored_per_turn():
+    """
+    Test that reasoning is captured and stored for each turn in a multi-turn chat.
+    """
+    llm = DeepSeek_LLM(model=REASONER_MODEL, enable_reasoning=True)
+    llm.chat(user="What is 2 + 2?")
+    llm.chat(user="And what is that result multiplied by 3?")
+    history = llm.history
+    assistant_msgs = [msg for msg in history if msg["role"] == "assistant"]
+    assert len(assistant_msgs) == 2
+    # both assistant turns should have reasoning attached
+    for msg in assistant_msgs:
+        assert "reasoning_content" in msg
+        assert isinstance(msg["reasoning_content"], str)
+        assert len(msg["reasoning_content"]) > 0