PyPI - llm-codegen-research - Versions diffs - 2.14__tar.gz → 2.16__tar.gz - Mend

llm-codegen-research 2.14tar.gz → 2.16tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

{llm_codegen_research-2.14 → llm_codegen_research-2.16}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: llm-codegen-research
-Version: 2.14
+Version: 2.16
 Summary: Useful classes and methods for researching code-generation by LLMs.
 Author-email: Lukas Twist <itsluketwist@gmail.com>
 Project-URL: Homepage, https://github.com/itsluketwist/llm-codegen-research
@@ -158,7 +158,7 @@ uv add openai
 Or to upgrade dependencies:
 ```shell
-uv sync --upgrade
+uv sync --extra api --upgrade
 ```
 Check typings with `ty`:

{llm_codegen_research-2.14 → llm_codegen_research-2.16}/README.md RENAMED Viewed

@@ -138,7 +138,7 @@ uv add openai
 Or to upgrade dependencies:
 ```shell
-uv sync --upgrade
+uv sync --extra api --upgrade
 ```
 Check typings with `ty`:

{llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/defaults.py RENAMED Viewed

@@ -7,4 +7,7 @@ DEFAULT_MODEL = "gpt-4.1-mini-2025-04-14"
 DEFAULT_CODEBLOCK_LANGUAGE = "python"
 # the default max_tokens to be used when prompting models
-DEFAULT_MAX_TOKENS = 2000
+DEFAULT_MAX_TOKENS = 4096
+# default token budget for anthropic extended thinking (minimum allowed is 1024)
+DEFAULT_THINKING_BUDGET = 2048

{llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/__init__.py RENAMED Viewed

@@ -33,6 +33,7 @@ def get_llm(
     top_p: float | None = None,
     max_tokens: int | None = None,
     provider: str | None = None,
+    enable_reasoning: bool = False,
     tools: list[Tool] | None = None,
     max_tool_iterations: int = MAX_TOOL_ITERATIONS,
     max_tool_calls: int = MAX_TOOL_CALLS,
@@ -41,7 +42,8 @@ def get_llm(
     Initialise the correct LLM client for the given model.
     If tools are provided, returns an OpenAI_Tool_LLM instance. Tool calls
-    are currently only supported for OpenAI models.
+    are currently only supported for OpenAI models. enable_reasoning is only
+    supported by Anthropic, DeepSeek, Mistral, and TogetherAI models.
     """
     llm_class: type[Base_LLM]
     if provider is not None:
@@ -63,6 +65,8 @@ def get_llm(
             raise NotImplementedError(
                 "Tool calls are only supported for OpenAI models."
             )
+        if enable_reasoning:
+            raise ValueError("OpenAI_Tool_LLM does not support enable_reasoning.")
         return OpenAI_Tool_LLM(
             tools=tools,
             model=model,
@@ -80,6 +84,7 @@ def get_llm(
         temperature=temperature,
         top_p=top_p,
         max_tokens=max_tokens,
+        enable_reasoning=enable_reasoning,
     )

{llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/anthropic.py RENAMED Viewed

@@ -3,9 +3,14 @@
 from typing import Any, cast
 import anthropic
-from anthropic.types import MessageParam, TextBlock
+from anthropic.types import (
+    MessageParam,
+    TextBlock,
+    ThinkingBlock,
+    ThinkingConfigEnabledParam,
+)
-from llm_cgr.defaults import DEFAULT_MAX_TOKENS
+from llm_cgr.defaults import DEFAULT_MAX_TOKENS, DEFAULT_THINKING_BUDGET
 from llm_cgr.llm.clients.base import Base_LLM
@@ -19,11 +24,14 @@ class Anthropic_LLM(Base_LLM):
         temperature: float | None = None,
         top_p: float | None = None,
         max_tokens: int | None = None,
+        enable_reasoning: bool = False,
     ) -> None:
         """
         Initialise the Anthropic client.
         Requires the ANTHROPIC_API_KEY environment variable to be set.
+        Set enable_reasoning=True to enable extended thinking on supported models
+        (e.g. claude-sonnet-4-5).
         """
         super().__init__(
             model=model,
@@ -31,6 +39,7 @@ class Anthropic_LLM(Base_LLM):
             temperature=temperature,
             top_p=top_p,
             max_tokens=max_tokens,
+            enable_reasoning=enable_reasoning,
         )
         self._client = anthropic.Anthropic()
@@ -66,15 +75,52 @@ class Anthropic_LLM(Base_LLM):
         temperature: float | None = None,
         top_p: float | None = None,
         max_tokens: int | None = None,
-    ) -> str:
+    ) -> tuple[str, str | None]:
         """Generate a model response from the Anthropic API."""
+        # extended thinking is incompatible with custom temperature/top_p
+        thinking = (
+            ThinkingConfigEnabledParam(
+                type="enabled",
+                budget_tokens=DEFAULT_THINKING_BUDGET,
+            )
+            if self._enable_reasoning
+            else anthropic.omit
+        )
+        # custom temperature/top_p are not supported alongside extended thinking,
+        # and the api rejects requests that set both temperature and top_p
+        _temperature = (
+            temperature
+            if temperature is not None and not self._enable_reasoning
+            else anthropic.omit
+        )
+        _top_p = (
+            top_p
+            if top_p is not None
+            and not self._enable_reasoning
+            and _temperature is anthropic.omit
+            else anthropic.omit
+        )
         response = self._client.messages.create(
             model=model,
             system=system or self._system or anthropic.omit,
             messages=cast(list[MessageParam], input),
-            temperature=temperature if temperature is not None else anthropic.omit,
-            top_p=top_p if top_p is not None else anthropic.omit,
+            temperature=_temperature,
+            top_p=_top_p,
             max_tokens=max_tokens if max_tokens is not None else DEFAULT_MAX_TOKENS,
+            thinking=thinking,
+        )
+        # collect chain-of-thought from any thinking blocks; None if not present
+        thinking_blocks = [
+            block.thinking
+            for block in response.content
+            if isinstance(block, ThinkingBlock)
+        ]
+        reasoning = "\n".join(thinking_blocks) if thinking_blocks else None
+        # the final answer is always returned as a text block
+        text_block = next(
+            block for block in response.content if isinstance(block, TextBlock)
         )
-        # cast to TextBlock as non-tool, non-thinking requests always return text
-        return cast(TextBlock, response.content[0]).text
+        return text_block.text, reasoning

{llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/base.py RENAMED Viewed

@@ -1,3 +1,5 @@
+"""Base class for LLM API clients."""
 from abc import ABC, abstractmethod
 from typing import Any
@@ -12,9 +14,13 @@ class Base_LLM(ABC):
         temperature: float | None = None,
         top_p: float | None = None,
         max_tokens: int | None = None,
+        enable_reasoning: bool = False,
     ) -> None:
         """
         Initialise the LLM client.
+        When enable_reasoning is True, generate() and chat() include chain-of-thought
+        alongside responses, and reasoning is stored in the chat history.
         """
         self._model = model
         self._system = system
@@ -24,6 +30,7 @@ class Base_LLM(ABC):
         self._top_p = top_p
         self._max_tokens = max_tokens
+        self._enable_reasoning = enable_reasoning
         self._history: list[dict[str, Any]] | None = None
     def generate(
@@ -35,9 +42,12 @@ class Base_LLM(ABC):
         temperature: float | None = None,
         top_p: float | None = None,
         max_tokens: int | None = None,
-    ) -> list[str]:
+    ) -> list[str] | list[tuple[str, str | None]]:
         """
         Generate model responses from the LLMs API.
+        When enable_reasoning is True, returns a list of (response, reasoning) tuples.
+        When False, returns a list of response strings.
         """
         _model = model or self._model
         if _model is None:
@@ -48,16 +58,19 @@ class Base_LLM(ABC):
             system=system or self._system,
         )
-        _generations = []
+        _generations: list[Any] = []
         for _ in range(samples):
-            response = self._get_response(
+            response, reasoning = self._get_response(
                 input=messages,
                 model=_model,
                 temperature=temperature or self._temperature,
                 top_p=top_p or self._top_p,
                 max_tokens=max_tokens or self._max_tokens,
             )
-            _generations.append(response)
+            if self._enable_reasoning:
+                _generations.append((response, reasoning))
+            else:
+                _generations.append(response)
         return _generations
@@ -69,9 +82,12 @@ class Base_LLM(ABC):
         temperature: float | None = None,
         top_p: float | None = None,
         max_tokens: int | None = None,
-    ) -> str:
+    ) -> str | tuple[str, str | None]:
         """
         Generate a model response from the LLMs API, in the ongoing chat.
+        When enable_reasoning is True, reasoning is stored in the history and the
+        return value is a (response, reasoning) tuple instead of a plain string.
         """
         _model = model or self._model
         if _model is None:
@@ -92,7 +108,7 @@ class Base_LLM(ABC):
                 )
             )
-        response = self._get_response(
+        response, reasoning = self._get_response(
             input=self._history,
             system=system,
             model=_model,
@@ -101,13 +117,14 @@ class Base_LLM(ABC):
             max_tokens=max_tokens or self._max_tokens,
         )
-        # update the history and return
-        self._history.append(
-            self._build_message(
-                role="assistant",
-                content=response,
-            )
-        )
+        # build the assistant history entry, attaching reasoning if present
+        assistant_message = self._build_message(role="assistant", content=response)
+        if self._enable_reasoning and reasoning is not None:
+            assistant_message["reasoning_content"] = reasoning
+        self._history.append(assistant_message)
+        if self._enable_reasoning:
+            return response, reasoning
         return response
     @property
@@ -146,9 +163,10 @@ class Base_LLM(ABC):
         temperature: float | None = None,
         top_p: float | None = None,
         max_tokens: int | None = None,
-    ) -> str:
+    ) -> tuple[str, str | None]:
         """
         Generate a model response from the LLM API.
-        Returns the text response to the prompt.
+        Returns a (response, reasoning) tuple; reasoning is None for models that
+        do not produce chain-of-thought output.
         """

{llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/deepseek.py RENAMED Viewed

@@ -1,4 +1,4 @@
-"""Class to access LLMs via the OpenAI API."""
+"""Class to access LLMs via the DeepSeek API."""
 import os
 from typing import Any, cast
@@ -19,11 +19,13 @@ class DeepSeek_LLM(Base_LLM):
         temperature: float | None = None,
         top_p: float | None = None,
         max_tokens: int | None = None,
+        enable_reasoning: bool = False,
     ) -> None:
         """
         Initialise the DeepSeek client.
         Requires the DEEPSEEK_API_KEY environment variable to be set.
+        Set enable_reasoning=True when using a reasoning model (e.g. deepseek-reasoner).
         """
         super().__init__(
             model=model,
@@ -31,6 +33,7 @@ class DeepSeek_LLM(Base_LLM):
             temperature=temperature,
             top_p=top_p,
             max_tokens=max_tokens,
+            enable_reasoning=enable_reasoning,
         )
         self._client = openai.OpenAI(
             api_key=os.environ["DEEPSEEK_API_KEY"],
@@ -65,14 +68,21 @@ class DeepSeek_LLM(Base_LLM):
         temperature: float | None = None,
         top_p: float | None = None,
         max_tokens: int | None = None,
-    ) -> str:
-        """Generate a model response from the OpenAI API."""
+    ) -> tuple[str, str | None]:
+        """Generate a model response from the DeepSeek API."""
         response = self._client.chat.completions.create(
             messages=cast(list[ChatCompletionMessageParam], input),
             model=model,
             temperature=temperature if temperature is not None else openai.omit,
             top_p=top_p if top_p is not None else openai.omit,
             max_completion_tokens=max_tokens if max_tokens is not None else openai.omit,
+            reasoning_effort="high",
+            extra_body={"thinking": {"type": "enabled"}},
         )
+        message = response.choices[0].message
+        # chain-of-thought from reasoning models (e.g. deepseek-reasoner); None otherwise
+        reasoning = getattr(message, "reasoning_content", None)
         # cast to str as text completions always return string content
-        return cast(str, response.choices[0].message.content)
+        return cast(str, message.content), reasoning

{llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/mistral.py RENAMED Viewed

@@ -4,6 +4,7 @@ import os
 from typing import Any
 from mistralai import client
+from mistralai.client.models import TextChunk, ThinkChunk
 from llm_cgr.llm.clients.base import Base_LLM
@@ -18,11 +19,14 @@ class Mistral_LLM(Base_LLM):
         temperature: float | None = None,
         top_p: float | None = None,
         max_tokens: int | None = None,
+        enable_reasoning: bool = False,
     ) -> None:
         """
         Initialise the Mistral client.
         Requires the MISTRAL_API_KEY environment variable to be set.
+        Set enable_reasoning=True to request chain-of-thought from reasoning
+        models (e.g. magistral-medium-latest).
         """
         super().__init__(
             model=model,
@@ -30,6 +34,7 @@ class Mistral_LLM(Base_LLM):
             temperature=temperature,
             top_p=top_p,
             max_tokens=max_tokens,
+            enable_reasoning=enable_reasoning,
         )
         self._client = client.Mistral(
             api_key=os.environ["MISTRAL_API_KEY"],
@@ -66,7 +71,7 @@ class Mistral_LLM(Base_LLM):
         temperature: float | None = None,
         top_p: float | None = None,
         max_tokens: int | None = None,
-    ) -> str:
+    ) -> tuple[str, str | None]:
         """Generate a model response from the MistralAI API."""
         response = self._client.chat.complete(
             model=model,
@@ -74,5 +79,23 @@ class Mistral_LLM(Base_LLM):
             temperature=temperature if temperature is not None else client.UNSET,
             top_p=top_p,
             max_tokens=max_tokens if max_tokens is not None else client.UNSET,
+            reasoning_effort="high" if self._enable_reasoning else client.UNSET,
         )
-        return response.choices[0].message.content
+        content = response.choices[0].message.content
+        # plain string content means no reasoning chunks were returned
+        if isinstance(content, str):
+            return content, None
+        # otherwise content is a list of chunks: thinking and final text
+        reasoning_parts = [
+            inner.text
+            for chunk in content
+            if isinstance(chunk, ThinkChunk)
+            for inner in chunk.thinking
+            if isinstance(inner, TextChunk)
+        ]
+        text_parts = [chunk.text for chunk in content if isinstance(chunk, TextChunk)]
+        reasoning = "\n".join(reasoning_parts) if reasoning_parts else None
+        return "\n".join(text_parts), reasoning

{llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/nscale.py RENAMED Viewed

@@ -19,18 +19,23 @@ class Nscale_LLM(Base_LLM):
         temperature: float | None = None,
         top_p: float | None = None,
         max_tokens: int | None = None,
+        enable_reasoning: bool = False,
     ) -> None:
         """
         Initialise the NSCALE client.
         Requires the NSCALE_API_KEY environment variable to be set.
         """
+        if enable_reasoning:
+            raise ValueError("Nscale_LLM does not support enable_reasoning.")
         super().__init__(
             model=model,
             system=system,
             temperature=temperature,
             top_p=top_p,
             max_tokens=max_tokens,
+            enable_reasoning=enable_reasoning,
         )
         self._client = openai.OpenAI(
             api_key=os.environ["NSCALE_API_KEY"],
@@ -65,7 +70,7 @@ class Nscale_LLM(Base_LLM):
         temperature: float | None = None,
         top_p: float | None = None,
         max_tokens: int | None = None,
-    ) -> str:
+    ) -> tuple[str, str | None]:
         """Generate a model response from the OpenAI API."""
         response = self._client.chat.completions.create(
             messages=cast(list[ChatCompletionMessageParam], input),
@@ -75,4 +80,4 @@ class Nscale_LLM(Base_LLM):
             max_completion_tokens=max_tokens if max_tokens is not None else openai.omit,
         )
         # cast to str as text completions always return string content
-        return cast(str, response.choices[0].message.content)
+        return cast(str, response.choices[0].message.content), None

{llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/openai.py RENAMED Viewed

@@ -18,18 +18,23 @@ class OpenAI_LLM(Base_LLM):
         temperature: float | None = None,
         top_p: float | None = None,
         max_tokens: int | None = None,
+        enable_reasoning: bool = False,
     ) -> None:
         """
         Initialise the OpenAI client.
         Requires the OPENAI_API_KEY environment variable to be set.
         """
+        if enable_reasoning:
+            raise ValueError("OpenAI_LLM does not support enable_reasoning.")
         super().__init__(
             model=model,
             system=system,
             temperature=temperature,
             top_p=top_p,
             max_tokens=max_tokens,
+            enable_reasoning=enable_reasoning,
         )
         self._client = openai.OpenAI()
@@ -61,7 +66,7 @@ class OpenAI_LLM(Base_LLM):
         temperature: int | float | None = None,
         top_p: int | float | None = None,
         max_tokens: int | None = None,
-    ) -> str:
+    ) -> tuple[str, str | None]:
         """Generate a model response from the OpenAI API."""
         self._client.responses.input_items
         response = self._client.responses.create(
@@ -71,4 +76,4 @@ class OpenAI_LLM(Base_LLM):
             top_p=top_p if top_p is not None else openai.omit,
             max_output_tokens=max_tokens if max_tokens is not None else openai.omit,
         )
-        return response.output_text
+        return response.output_text, None

{llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/protocol.py RENAMED Viewed

@@ -17,7 +17,7 @@ class GenerationProtocol(Protocol):
         temperature: float | None = None,
         top_p: float | None = None,
         max_tokens: int | None = None,
-    ) -> list[str]:
+    ) -> list[str] | list[tuple[str, str | None]]:
         """
         Generate model responses from the LLMs API.
         """
@@ -30,7 +30,7 @@ class GenerationProtocol(Protocol):
         temperature: float | None = None,
         top_p: float | None = None,
         max_tokens: int | None = None,
-    ) -> str:
+    ) -> str | tuple[str, str | None]:
         """
         Generate a model response from the LLMs API, in the ongoing chat.
         """

{llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/clients/together.py RENAMED Viewed

@@ -1,5 +1,6 @@
 """Class to access LLMs via the TogetherAI API."""
+import re
 from typing import Any, cast
 import together
@@ -7,6 +8,11 @@ import together
 from llm_cgr.llm.clients.base import Base_LLM
+# matches a <think>...</think> block at the start of a response, used by
+# models that embed their reasoning trace directly in the content
+_THINK_BLOCK = re.compile(r"\A<think>(.*?)</think>\s*", re.DOTALL)
 class TogetherAI_LLM(Base_LLM):
     """Class to access LLMs via the TogetherAI API."""
@@ -17,11 +23,13 @@ class TogetherAI_LLM(Base_LLM):
         temperature: float | None = None,
         top_p: float | None = None,
         max_tokens: int | None = None,
+        enable_reasoning: bool = False,
     ) -> None:
         """
         Initialise the TogetherAI client.
         Requires the TOGETHER_API_KEY environment variable to be set.
+        Set enable_reasoning=True when using a reasoning model (e.g. deepseek-ai/DeepSeek-R1).
         """
         super().__init__(
             model=model,
@@ -29,6 +37,7 @@ class TogetherAI_LLM(Base_LLM):
             temperature=temperature,
             top_p=top_p,
             max_tokens=max_tokens,
+            enable_reasoning=enable_reasoning,
         )
         self._client = together.Together()
@@ -60,7 +69,7 @@ class TogetherAI_LLM(Base_LLM):
         temperature: float | None = None,
         top_p: float | None = None,
         max_tokens: int | None = None,
-    ) -> str:
+    ) -> tuple[str, str | None]:
         """Generate a model response from the TogetherAI API."""
         response = self._client.chat.completions.create(
             model=model,
@@ -72,4 +81,20 @@ class TogetherAI_LLM(Base_LLM):
         # cast to Any first as together doesn't publicly export the message type,
         # then cast content to str as text completions always have it set
         message = cast(Any, response.choices[0].message)
-        return cast(str, message.content)
+        content = cast(str, message.content)
+        # chain-of-thought from reasoning models: most (e.g. DeepSeek-R1) use
+        # reasoning_content, some (e.g. Kimi-K2.6) use reasoning; None otherwise
+        reasoning = getattr(message, "reasoning_content", None) or getattr(
+            message, "reasoning", None
+        )
+        # some models embed their reasoning as a <think>...</think> block at
+        # the start of content instead of a separate field; pull it out
+        if reasoning is None:
+            think_match = _THINK_BLOCK.match(content)
+            if think_match:
+                reasoning = think_match.group(1).strip()
+                content = content[think_match.end() :]
+        return content, reasoning

{llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_cgr/llm/generate.py RENAMED Viewed

@@ -1,10 +1,13 @@
 """API utilities for interfacing with the generation models."""
+from typing import Literal, overload
 from llm_cgr.defaults import DEFAULT_MODEL
 from llm_cgr.llm.clients import get_llm
 from llm_cgr.llm.prompts import BOOL_SYSTEM_PROMPT, LIST_SYSTEM_PROMPT
+@overload
 def generate(
     user: str,
     model: str = DEFAULT_MODEL,
@@ -13,15 +16,47 @@ def generate(
     top_p: float | None = None,
     max_tokens: int | None = None,
     provider: str | None = None,
+    enable_reasoning: Literal[False] = False,
     **generate_kwargs,
-) -> str:
+) -> str: ...
+@overload
+def generate(
+    user: str,
+    model: str = DEFAULT_MODEL,
+    system: str | None = None,
+    temperature: float | None = None,
+    top_p: float | None = None,
+    max_tokens: int | None = None,
+    provider: str | None = None,
+    enable_reasoning: Literal[True] = True,
+    **generate_kwargs,
+) -> tuple[str, str | None]: ...
+def generate(
+    user: str,
+    model: str = DEFAULT_MODEL,
+    system: str | None = None,
+    temperature: float | None = None,
+    top_p: float | None = None,
+    max_tokens: int | None = None,
+    provider: str | None = None,
+    enable_reasoning: bool = False,
+    **generate_kwargs,
+) -> str | tuple[str, str | None]:
     """
     Simple function to quickly prompt a model for a response.
+    When enable_reasoning is True, returns a (response, reasoning) tuple instead
+    of a plain string.
     """
     client = get_llm(
         model=model,
         system=system,
         provider=provider,
+        enable_reasoning=enable_reasoning,
     )
     [result] = client.generate(
         user=user,

{llm_codegen_research-2.14 → llm_codegen_research-2.16}/src/llm_codegen_research.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: llm-codegen-research
-Version: 2.14
+Version: 2.16
 Summary: Useful classes and methods for researching code-generation by LLMs.
 Author-email: Lukas Twist <itsluketwist@gmail.com>
 Project-URL: Homepage, https://github.com/itsluketwist/llm-codegen-research
@@ -158,7 +158,7 @@ uv add openai
 Or to upgrade dependencies:
 ```shell
-uv sync --upgrade
+uv sync --extra api --upgrade
 ```
 Check typings with `ty`: