PyPI - fabricatio - Versions diffs - 0.2.5.dev4__cp312-cp312-win_amd64.whl → 0.2.5.dev5__cp312-cp312-win_amd64.whl - Mend

fabricatio 0.2.5.dev4__cp312-cp312-win_amd64.whl → 0.2.5.dev5__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

fabricatio/_rust.cp312-win_amd64.pyd +0 -0
fabricatio/actions/rag.py +1 -1
fabricatio/capabilities/propose.py +14 -20
fabricatio/capabilities/rating.py +41 -36
fabricatio/capabilities/review.py +8 -9
fabricatio/capabilities/task.py +7 -8
fabricatio/config.py +8 -4
fabricatio/fs/readers.py +1 -1
fabricatio/journal.py +1 -0
fabricatio/models/action.py +1 -1
fabricatio/models/events.py +6 -4
fabricatio/models/extra.py +19 -16
fabricatio/models/generic.py +14 -1
fabricatio/models/kwargs_types.py +70 -72
fabricatio/models/tool.py +4 -4
fabricatio/models/usages.py +67 -68
fabricatio/parser.py +26 -5
{fabricatio-0.2.5.dev4.data → fabricatio-0.2.5.dev5.data}/scripts/tdown.exe +0 -0
{fabricatio-0.2.5.dev4.dist-info → fabricatio-0.2.5.dev5.dist-info}/METADATA +2 -1
fabricatio-0.2.5.dev5.dist-info/RECORD +41 -0
fabricatio-0.2.5.dev4.dist-info/RECORD +0 -41
{fabricatio-0.2.5.dev4.dist-info → fabricatio-0.2.5.dev5.dist-info}/WHEEL +0 -0
{fabricatio-0.2.5.dev4.dist-info → fabricatio-0.2.5.dev5.dist-info}/licenses/LICENSE +0 -0

fabricatio/models/kwargs_types.py CHANGED Viewed

@@ -1,92 +1,90 @@
 """This module contains the types for the keyword arguments of the methods in the models module."""
-from typing import Any, List, NotRequired, Set, TypedDict
+from typing import Any, TypedDict
 from litellm.caching.caching import CacheMode
 from litellm.types.caching import CachingSupportedCallTypes
-from pydantic import NonNegativeFloat, NonNegativeInt, PositiveInt
-class CollectionSimpleConfigKwargs(TypedDict):
+class CollectionSimpleConfigKwargs(TypedDict, total=False):
     """Configuration parameters for a vector collection.
     These arguments are typically used when configuring connections to vector databases.
     """
-    dimension: NotRequired[int]
-    timeout: NotRequired[float]
+    dimension: int
+    timeout: float
-class FetchKwargs(TypedDict):
+class FetchKwargs(TypedDict, total=False):
     """Arguments for fetching data from vector collections.
     Controls how data is retrieved from vector databases, including filtering
     and result limiting parameters.
     """
-    collection_name: NotRequired[str]
-    similarity_threshold: NotRequired[float]
-    result_per_query: NotRequired[int]
+    collection_name: str
+    similarity_threshold: float
+    result_per_query: int
-class EmbeddingKwargs(TypedDict):
+class EmbeddingKwargs(TypedDict, total=False):
     """Configuration parameters for text embedding operations.
     These settings control the behavior of embedding models that convert text
     to vector representations.
     """
-    model: NotRequired[str]
-    dimensions: NotRequired[int]
-    timeout: NotRequired[PositiveInt]
-    caching: NotRequired[bool]
+    model: str
+    dimensions: int
+    timeout: int
+    caching: bool
-class LLMKwargs(TypedDict):
+class LLMKwargs(TypedDict, total=False):
     """Configuration parameters for language model inference.
     These arguments control the behavior of large language model calls,
     including generation parameters and caching options.
     """
-    model: NotRequired[str]
-    temperature: NotRequired[NonNegativeFloat]
-    stop: NotRequired[str | List[str]]
-    top_p: NotRequired[NonNegativeFloat]
-    max_tokens: NotRequired[PositiveInt]
-    stream: NotRequired[bool]
-    timeout: NotRequired[PositiveInt]
-    max_retries: NotRequired[PositiveInt]
-    no_cache: NotRequired[bool]  # If use cache in this call
-    no_store: NotRequired[bool]  # If store the response of this call to cache
-    cache_ttl: NotRequired[int]  # how long the stored cache is alive, in seconds
-    s_maxage: NotRequired[int]  # max accepted age of cached response, in seconds
-class ValidateKwargs[T](LLMKwargs):
-    """Arguments for content validation operations.
+    model: str
+    temperature: float
+    stop: str | list[str]
+    top_p: float
+    max_tokens: int
+    stream: bool
+    timeout: int
+    max_retries: int
+    no_cache: bool  # if the req uses cache in this call
+    no_store: bool  # If store the response of this call to cache
+    cache_ttl: int  # how long the stored cache is alive, in seconds
+    s_maxage: int  # max accepted age of cached response, in seconds
+class GenerateKwargs(LLMKwargs, total=False):
+    """Arguments for content generation operations.
-    Extends LLMKwargs with additional parameters specific to validation tasks,
-    such as limiting the number of validation attempts.
+    Extends LLMKwargs with additional parameters specific to generation tasks,
+    such as the number of generated items and the system message.
     """
-    default: NotRequired[T]
-    max_validations: NotRequired[PositiveInt]
+    system_message: str
-# noinspection PyTypedDict
-class GenerateKwargs[T](ValidateKwargs[T]):
-    """Arguments for content generation operations.
+class ValidateKwargs[T](GenerateKwargs, total=False):
+    """Arguments for content validation operations.
-    Extends ValidateKwargs with parameters specific to text generation,
-    including system prompt configuration.
+    Extends LLMKwargs with additional parameters specific to validation tasks,
+    such as limiting the number of validation attempts.
     """
-    system_message: NotRequired[str]
+    default: T
+    max_validations: int
 # noinspection PyTypedDict
-class ReviewKwargs[T](GenerateKwargs[T]):
+class ReviewKwargs[T](ValidateKwargs[T], total=False):
     """Arguments for content review operations.
     Extends GenerateKwargs with parameters for evaluating content against
@@ -94,18 +92,18 @@ class ReviewKwargs[T](GenerateKwargs[T]):
     """
     topic: str
-    criteria: NotRequired[Set[str]]
+    criteria: set[str]
 # noinspection PyTypedDict
-class ChooseKwargs[T](GenerateKwargs[T]):
+class ChooseKwargs[T](ValidateKwargs[T], total=False):
     """Arguments for selection operations.
     Extends GenerateKwargs with parameters for selecting among options,
     such as the number of items to choose.
     """
-    k: NotRequired[NonNegativeInt]
+    k: int
 class CacheKwargs(TypedDict, total=False):
@@ -115,35 +113,35 @@ class CacheKwargs(TypedDict, total=False):
     including in-memory, Redis, S3, and vector database caching options.
     """
-    mode: NotRequired[CacheMode]  # when default_on cache is always on, when default_off cache is opt in
-    host: NotRequired[str]
-    port: NotRequired[str]
-    password: NotRequired[str]
-    namespace: NotRequired[str]
-    ttl: NotRequired[float]
-    default_in_memory_ttl: NotRequired[float]
-    default_in_redis_ttl: NotRequired[float]
-    similarity_threshold: NotRequired[float]
-    supported_call_types: NotRequired[List[CachingSupportedCallTypes]]
+    mode: CacheMode  # when default_on cache is always on, when default_off cache is opt in
+    host: str
+    port: str
+    password: str
+    namespace: str
+    ttl: float
+    default_in_memory_ttl: float
+    default_in_redis_ttl: float
+    similarity_threshold: float
+    supported_call_types: list[CachingSupportedCallTypes]
     # s3 Bucket, boto3 configuration
-    s3_bucket_name: NotRequired[str]
-    s3_region_name: NotRequired[str]
-    s3_api_version: NotRequired[str]
-    s3_use_ssl: NotRequired[bool]
-    s3_verify: NotRequired[bool | str]
-    s3_endpoint_url: NotRequired[str]
-    s3_aws_access_key_id: NotRequired[str]
-    s3_aws_secret_access_key: NotRequired[str]
-    s3_aws_session_token: NotRequired[str]
-    s3_config: NotRequired[Any]
-    s3_path: NotRequired[str]
+    s3_bucket_name: str
+    s3_region_name: str
+    s3_api_version: str
+    s3_use_ssl: bool
+    s3_verify: bool | str
+    s3_endpoint_url: str
+    s3_aws_access_key_id: str
+    s3_aws_secret_access_key: str
+    s3_aws_session_token: str
+    s3_config: Any
+    s3_path: str
     redis_semantic_cache_use_async: bool
     redis_semantic_cache_embedding_model: str
-    redis_flush_size: NotRequired[int]
-    redis_startup_nodes: NotRequired[List]
+    redis_flush_size: int
+    redis_startup_nodes: list
     disk_cache_dir: Any
-    qdrant_api_base: NotRequired[str]
-    qdrant_api_key: NotRequired[str]
-    qdrant_collection_name: NotRequired[str]
-    qdrant_quantization_config: NotRequired[str]
+    qdrant_api_base: str
+    qdrant_api_key: str
+    qdrant_collection_name: str
+    qdrant_quantization_config: str
     qdrant_semantic_cache_embedding_model: str

fabricatio/models/tool.py CHANGED Viewed

@@ -4,7 +4,7 @@ from importlib.machinery import ModuleSpec
 from importlib.util import module_from_spec
 from inspect import iscoroutinefunction, signature
 from types import CodeType, ModuleType
-from typing import Any, Callable, Dict, List, Optional, Self, overload
+from typing import Any, Callable, Dict, List, Optional, Self, cast, overload
 from fabricatio.config import configs
 from fabricatio.decorators import logging_execution_info, use_temp_module
@@ -136,7 +136,7 @@ class ToolExecutor(BaseModel):
     def inject_tools[M: ModuleType](self, module: Optional[M] = None) -> M:
         """Inject the tools into the provided module or default."""
-        module = module or module_from_spec(spec=ModuleSpec(name=configs.toolbox.tool_module_name, loader=None))
+        module = module or cast(M, module_from_spec(spec=ModuleSpec(name=configs.toolbox.tool_module_name, loader=None)))
         for tool in self.candidates:
             logger.debug(f"Injecting tool: {tool.name}")
             setattr(module, tool.name, tool.invoke)
@@ -144,7 +144,7 @@ class ToolExecutor(BaseModel):
     def inject_data[M: ModuleType](self, module: Optional[M] = None) -> M:
         """Inject the data into the provided module or default."""
-        module = module or module_from_spec(spec=ModuleSpec(name=configs.toolbox.data_module_name, loader=None))
+        module = module or cast(M,module_from_spec(spec=ModuleSpec(name=configs.toolbox.data_module_name, loader=None)))
         for key, value in self.data.items():
             logger.debug(f"Injecting data: {key}")
             setattr(module, key, value)
@@ -184,6 +184,6 @@ class ToolExecutor(BaseModel):
         tools = []
         while tool_name := recipe.pop(0):
             for toolbox in toolboxes:
-                tools.append(toolbox[tool_name])
+                tools.append(toolbox.get(tool_name))
         return cls(candidates=tools)

fabricatio/models/usages.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """This module contains classes that manage the usage of language models and tools in tasks."""
 from asyncio import gather
-from typing import Callable, Dict, Iterable, List, Optional, Self, Set, Type, Union, Unpack, overload
+from typing import Callable, Dict, Iterable, List, Optional, Self, Sequence, Set, Type, Union, Unpack, overload
 import asyncstdlib
 import litellm
@@ -9,7 +9,7 @@ from fabricatio._rust_instances import template_manager
 from fabricatio.config import configs
 from fabricatio.journal import logger
 from fabricatio.models.generic import ScopedConfig, WithBriefing
-from fabricatio.models.kwargs_types import ChooseKwargs, EmbeddingKwargs, GenerateKwargs, LLMKwargs
+from fabricatio.models.kwargs_types import ChooseKwargs, EmbeddingKwargs, GenerateKwargs, LLMKwargs, ValidateKwargs
 from fabricatio.models.task import Task
 from fabricatio.models.tool import Tool, ToolBox
 from fabricatio.models.utils import Messages
@@ -20,12 +20,13 @@ from litellm.types.utils import (
     EmbeddingResponse,
     ModelResponse,
     StreamingChoices,
+    TextChoices,
 )
 from litellm.utils import CustomStreamWrapper
 from more_itertools import duplicates_everseen
 from pydantic import Field, NonNegativeInt, PositiveInt
-if configs.cache.enabled:
+if configs.cache.enabled and configs.cache.type:
     litellm.enable_cache(type=configs.cache.type, **configs.cache.params)
     logger.success(f"{configs.cache.type.name} Cache enabled")
@@ -42,7 +43,7 @@ class LLMUsage(ScopedConfig):
         messages: List[Dict[str, str]],
         n: PositiveInt | None = None,
         **kwargs: Unpack[LLMKwargs],
-    ) -> ModelResponse | CustomStreamWrapper:
+    ) -> ModelResponse:
         """Asynchronously queries the language model to generate a response based on the provided messages and parameters.
         Args:
@@ -81,7 +82,7 @@ class LLMUsage(ScopedConfig):
         system_message: str = "",
         n: PositiveInt | None = None,
         **kwargs: Unpack[LLMKwargs],
-    ) -> List[Choices | StreamingChoices]:
+    ) -> Sequence[TextChoices | Choices | StreamingChoices]:
         """Asynchronously invokes the language model with a question and optional system message.
         Args:
@@ -101,13 +102,14 @@ class LLMUsage(ScopedConfig):
         if isinstance(resp, ModelResponse):
             return resp.choices
         if isinstance(resp, CustomStreamWrapper):
-            if not configs.debug.streaming_visible:
-                return stream_chunk_builder(await asyncstdlib.list()).choices
+            if not configs.debug.streaming_visible and (pack := stream_chunk_builder(await asyncstdlib.list())):
+                return pack.choices
             chunks = []
             async for chunk in resp:
                 chunks.append(chunk)
                 print(chunk.choices[0].delta.content or "", end="")  # noqa: T201
-            return stream_chunk_builder(chunks).choices
+            if pack := stream_chunk_builder(chunks):
+                return pack.choices
         logger.critical(err := f"Unexpected response type: {type(resp)}")
         raise ValueError(err)
@@ -166,15 +168,15 @@ class LLMUsage(ScopedConfig):
                         for q, sm in zip(q_seq, sm_seq, strict=True)
                     ]
                 )
-                return [r.pop().message.content for r in res]
+                return [r[0].message.content for r in res]
             case (list(q_seq), str(sm)):
                 res = await gather(*[self.ainvoke(n=1, question=q, system_message=sm, **kwargs) for q in q_seq])
-                return [r.pop().message.content for r in res]
+                return [r[0].message.content for r in res]
             case (str(q), list(sm_seq)):
                 res = await gather(*[self.ainvoke(n=1, question=q, system_message=sm, **kwargs) for sm in sm_seq])
-                return [r.pop().message.content for r in res]
+                return [r[0].message.content for r in res]
             case (str(q), str(sm)):
-                return ((await self.ainvoke(n=1, question=q, system_message=sm, **kwargs)).pop()).message.content
+                return ((await self.ainvoke(n=1, question=q, system_message=sm, **kwargs))[0]).message.content
             case _:
                 raise RuntimeError("Should not reach here.")
@@ -185,8 +187,7 @@ class LLMUsage(ScopedConfig):
         validator: Callable[[str], T | None],
         default: T,
         max_validations: PositiveInt = 2,
-        system_message: str = "",
-        **kwargs: Unpack[LLMKwargs],
+        **kwargs: Unpack[GenerateKwargs],
     ) -> T: ...
     @overload
     async def aask_validate[T](
@@ -195,19 +196,36 @@ class LLMUsage(ScopedConfig):
         validator: Callable[[str], T | None],
         default: None = None,
         max_validations: PositiveInt = 2,
-        system_message: str = "",
-        **kwargs: Unpack[LLMKwargs],
+        **kwargs: Unpack[GenerateKwargs],
     ) -> Optional[T]: ...
+    @overload
     async def aask_validate[T](
         self,
-        question: str,
+        question: List[str],
+        validator: Callable[[str], T | None],
+        default: None = None,
+        max_validations: PositiveInt = 2,
+        **kwargs: Unpack[GenerateKwargs],
+    ) -> List[Optional[T]]: ...
+    @overload
+    async def aask_validate[T](
+        self,
+        question: List[str],
+        validator: Callable[[str], T | None],
+        default: T,
+        max_validations: PositiveInt = 2,
+        **kwargs: Unpack[GenerateKwargs],
+    ) -> List[T]: ...
+    async def aask_validate[T](
+        self,
+        question: str | List[str],
         validator: Callable[[str], T | None],
         default: Optional[T] = None,
         max_validations: PositiveInt = 2,
-        system_message: str = "",
-        **kwargs: Unpack[LLMKwargs],
-    ) -> Optional[T]:
+        **kwargs: Unpack[GenerateKwargs],
+    ) -> Optional[T] | List[Optional[T]] | List[T] | T:
         """Asynchronously asks a question and validates the response using a given validator.
         Args:
@@ -215,59 +233,42 @@ class LLMUsage(ScopedConfig):
             validator (Callable[[str], T | None]): A function to validate the response.
             default (T | None): Default value to return if validation fails. Defaults to None.
             max_validations (PositiveInt): Maximum number of validation attempts. Defaults to 2.
-            system_message (str): System message to include in the request. Defaults to an empty string.
             **kwargs (Unpack[LLMKwargs]): Additional keyword arguments for the LLM usage.
         Returns:
             T: The validated response.
         """
-        for i in range(max_validations):
-            if (
-                response := await self.aask(
-                    question=question,
-                    system_message=system_message,
-                    **kwargs,
-                )
-            ) and (validated := validator(response)):
-                logger.debug(f"Successfully validated the response at {i}th attempt.")
-                return validated
-            kwargs["no_cache"] = True
-            logger.debug("Closed the cache for the next attempt")
-        if default is None:
-            logger.error(f"Failed to validate the response after {max_validations} attempts.")
-        return default
-    async def aask_validate_batch[T](
-        self,
-        questions: List[str],
-        validator: Callable[[str], T | None],
-        **kwargs: Unpack[GenerateKwargs[T]],
-    ) -> List[T]:
-        """Asynchronously asks a batch of questions and validates the responses using a given validator.
-        Args:
-            questions (List[str]): The list of questions to ask.
-            validator (Callable[[str], T | None]): A function to validate the response.
-            **kwargs (Unpack[GenerateKwargs]): Additional keyword arguments for the LLM usage.
-        Returns:
-            T: The validated response.
-        Raises:
-            ValueError: If the response fails to validate after the maximum number of attempts.
-        """
-        return await gather(*[self.aask_validate(question, validator, **kwargs) for question in questions])
+        async def _inner(q: str) -> Optional[T]:
+            for lap in range(max_validations):
+                try:
+                    if (response := await self.aask(question=q, **kwargs)) and (validated := validator(response)):
+                        logger.debug(f"Successfully validated the response at {lap}th attempt.")
+                        return validated
+                except Exception as e:  # noqa: BLE001
+                    logger.error(f"Error during validation: \n{e}")
+                    break
+                kwargs["no_cache"] = True
+                logger.debug("Closed the cache for the next attempt")
+            if default is None:
+                logger.error(f"Failed to validate the response after {max_validations} attempts.")
+            return default
+        if isinstance(question, str):
+            return await _inner(question)
+        return await gather(*[_inner(q) for q in question])
     async def aliststr(
-        self, requirement: str, k: NonNegativeInt = 0, **kwargs: Unpack[GenerateKwargs[List[str]]]
+        self, requirement: str, k: NonNegativeInt = 0, **kwargs: Unpack[ValidateKwargs[List[str]]]
     ) -> List[str]:
         """Asynchronously generates a list of strings based on a given requirement.
         Args:
             requirement (str): The requirement for the list of strings.
             k (NonNegativeInt): The number of choices to select, 0 means infinite. Defaults to 0.
-            **kwargs (Unpack[GenerateKwargs]): Additional keyword arguments for the LLM usage.
+            **kwargs (Unpack[ValidateKwargs]): Additional keyword arguments for the LLM usage.
         Returns:
             List[str]: The validated response as a list of strings.
@@ -299,12 +300,12 @@ class LLMUsage(ScopedConfig):
             **kwargs,
         )
-    async def awhich_pathstr(self, requirement: str, **kwargs: Unpack[GenerateKwargs[List[str]]]) -> str:
+    async def awhich_pathstr(self, requirement: str, **kwargs: Unpack[ValidateKwargs[List[str]]]) -> str:
         """Asynchronously generates a single path string based on a given requirement.
         Args:
             requirement (str): The requirement for the list of strings.
-            **kwargs (Unpack[GenerateKwargs]): Additional keyword arguments for the LLM usage.
+            **kwargs (Unpack[ValidateKwargs]): Additional keyword arguments for the LLM usage.
         Returns:
             str: The validated response as a single string.
@@ -322,7 +323,7 @@ class LLMUsage(ScopedConfig):
         instruction: str,
         choices: List[T],
         k: NonNegativeInt = 0,
-        **kwargs: Unpack[GenerateKwargs[List[T]]],
+        **kwargs: Unpack[ValidateKwargs[List[T]]],
     ) -> List[T]:
         """Asynchronously executes a multi-choice decision-making process, generating a prompt based on the instruction and options, and validates the returned selection results.
@@ -330,7 +331,7 @@ class LLMUsage(ScopedConfig):
             instruction (str): The user-provided instruction/question description.
             choices (List[T]): A list of candidate options, requiring elements to have `name` and `briefing` fields.
             k (NonNegativeInt): The number of choices to select, 0 means infinite. Defaults to 0.
-            **kwargs (Unpack[GenerateKwargs]): Additional keyword arguments for the LLM usage.
+            **kwargs (Unpack[ValidateKwargs]): Additional keyword arguments for the LLM usage.
         Returns:
             List[T]: The final validated selection result list, with element types matching the input `choices`.
@@ -373,14 +374,14 @@ class LLMUsage(ScopedConfig):
         self,
         instruction: str,
         choices: List[T],
-        **kwargs: Unpack[GenerateKwargs[List[T]]],
+        **kwargs: Unpack[ValidateKwargs[List[T]]],
     ) -> T:
         """Asynchronously picks a single choice from a list of options using AI validation.
         Args:
             instruction (str): The user-provided instruction/question description.
             choices (List[T]): A list of candidate options, requiring elements to have `name` and `briefing` fields.
-            **kwargs (Unpack[GenerateKwargs]): Additional keyword arguments for the LLM usage.
+            **kwargs (Unpack[ValidateKwargs]): Additional keyword arguments for the LLM usage.
         Returns:
             T: The single selected item from the choices list.
@@ -402,7 +403,7 @@ class LLMUsage(ScopedConfig):
         prompt: str,
         affirm_case: str = "",
         deny_case: str = "",
-        **kwargs: Unpack[GenerateKwargs[bool]],
+        **kwargs: Unpack[ValidateKwargs[bool]],
     ) -> bool:
         """Asynchronously judges a prompt using AI validation.
@@ -410,7 +411,7 @@ class LLMUsage(ScopedConfig):
             prompt (str): The input prompt to be judged.
             affirm_case (str): The affirmative case for the AI model. Defaults to an empty string.
             deny_case (str): The negative case for the AI model. Defaults to an empty string.
-            **kwargs (Unpack[GenerateKwargs]): Additional keyword arguments for the LLM usage.
+            **kwargs (Unpack[ValidateKwargs]): Additional keyword arguments for the LLM usage.
         Returns:
             bool: The judgment result (True or False) based on the AI's response.
@@ -516,7 +517,6 @@ class ToolBoxUsage(LLMUsage):
     async def choose_toolboxes(
         self,
         task: Task,
-        system_message: str = "",
         **kwargs: Unpack[ChooseKwargs[List[ToolBox]]],
     ) -> List[ToolBox]:
         """Asynchronously executes a multi-choice decision-making process to choose toolboxes.
@@ -535,7 +535,6 @@ class ToolBoxUsage(LLMUsage):
         return await self.achoose(
             instruction=task.briefing,
             choices=list(self.toolboxes),
-            system_message=system_message,
             **kwargs,
         )

fabricatio/parser.py CHANGED Viewed

@@ -1,12 +1,14 @@
 """A module to parse text using regular expressions."""
-from typing import Any, Callable, Optional, Self, Tuple, Type
+from typing import Any, Callable, Iterable, List, Optional, Self, Tuple, Type
 import orjson
 import regex
+from json_repair import repair_json
 from pydantic import BaseModel, ConfigDict, Field, PositiveInt, PrivateAttr, ValidationError
 from regex import Pattern, compile
+from fabricatio.config import configs
 from fabricatio.journal import logger
@@ -25,12 +27,31 @@ class Capture(BaseModel):
     """The regular expression pattern to search for."""
     flags: PositiveInt = Field(default=regex.DOTALL | regex.MULTILINE | regex.IGNORECASE, frozen=True)
     """The flags to use when compiling the regular expression pattern."""
+    capture_type: Optional[str] = None
+    """The type of capture to perform, e.g., 'json', which is used to dispatch the fixer accordingly."""
     _compiled: Pattern = PrivateAttr()
     def model_post_init(self, __context: Any) -> None:
         """Initialize the compiled pattern."""
         self._compiled = compile(self.pattern, self.flags)
+    def fix[T](self, text: str | Iterable[str]|T) -> str | List[str]|T:
+        """Fix the text using the pattern.
+        Args:
+            text (str | List[str]): The text to fix.
+        Returns:
+            str | List[str]: The fixed text with the same type as input.
+        """
+        match self.capture_type:
+            case "json":
+                if isinstance(text, str):
+                    return repair_json(text,ensure_ascii=False)
+                return [repair_json(item) for item in text]
+            case _:
+                return text
     def capture(self, text: str) -> Tuple[str, ...] | str | None:
         """Capture the first occurrence of the pattern in the given text.
@@ -44,12 +65,12 @@ class Capture(BaseModel):
         match = self._compiled.search(text)
         if match is None:
             return None
+        groups = self.fix(match.groups()) if configs.general.use_json_repair else match.groups()
         if self.target_groups:
-            cap = tuple(match.group(g) for g in self.target_groups)
+            cap = tuple(groups[g - 1] for g in self.target_groups)
             logger.debug(f"Captured text: {'\n\n'.join(cap)}")
             return cap
-        cap = match.group(1)
+        cap = groups[0]
         logger.debug(f"Captured text: \n{cap}")
         return cap
@@ -111,7 +132,7 @@ class Capture(BaseModel):
         Returns:
             Self: The instance of the class with the captured code block.
         """
-        return cls(pattern=f"```{language}\n(.*?)\n```")
+        return cls(pattern=f"```{language}\n(.*?)\n```", capture_type=language)
 JsonCapture = Capture.capture_code_block("json")

{fabricatio-0.2.5.dev4.data → fabricatio-0.2.5.dev5.data}/scripts/tdown.exe RENAMED Viewed

Binary file

{fabricatio-0.2.5.dev4.dist-info → fabricatio-0.2.5.dev5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: fabricatio
-Version: 0.2.5.dev4
+Version: 0.2.5.dev5
 Classifier: License :: OSI Approved :: MIT License
 Classifier: Programming Language :: Rust
 Classifier: Programming Language :: Python :: 3.12
@@ -11,6 +11,7 @@ Classifier: Typing :: Typed
 Requires-Dist: appdirs>=1.4.4
 Requires-Dist: asyncio>=3.4.3
 Requires-Dist: asyncstdlib>=3.13.0
+Requires-Dist: json-repair>=0.39.1
 Requires-Dist: litellm>=1.60.0
 Requires-Dist: loguru>=0.7.3
 Requires-Dist: magika>=0.5.1