PyPI - fabricatio - Versions diffs - 0.2.9.dev4__cp312-cp312-win_amd64.whl → 0.2.10__cp312-cp312-win_amd64.whl - Mend

fabricatio 0.2.9.dev4__cp312-cp312-win_amd64.whl → 0.2.10__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

fabricatio/actions/article.py +20 -106
fabricatio/actions/article_rag.py +153 -22
fabricatio/actions/fs.py +25 -0
fabricatio/actions/output.py +17 -3
fabricatio/actions/rag.py +40 -18
fabricatio/actions/rules.py +14 -3
fabricatio/capabilities/check.py +2 -1
fabricatio/capabilities/rag.py +41 -231
fabricatio/config.py +4 -2
fabricatio/constants.py +20 -0
fabricatio/decorators.py +23 -0
fabricatio/models/adv_kwargs_types.py +35 -0
fabricatio/models/events.py +6 -6
fabricatio/models/extra/advanced_judge.py +2 -2
fabricatio/models/extra/aricle_rag.py +170 -0
fabricatio/models/extra/article_base.py +2 -186
fabricatio/models/extra/article_essence.py +8 -7
fabricatio/models/extra/article_main.py +39 -107
fabricatio/models/extra/problem.py +12 -17
fabricatio/models/extra/rag.py +98 -0
fabricatio/models/extra/rule.py +1 -2
fabricatio/models/generic.py +35 -12
fabricatio/models/kwargs_types.py +8 -36
fabricatio/models/task.py +3 -3
fabricatio/models/usages.py +80 -6
fabricatio/rust.cp312-win_amd64.pyd +0 -0
fabricatio/rust.pyi +138 -6
fabricatio/utils.py +62 -4
fabricatio-0.2.10.data/scripts/tdown.exe +0 -0
{fabricatio-0.2.9.dev4.dist-info → fabricatio-0.2.10.dist-info}/METADATA +1 -4
fabricatio-0.2.10.dist-info/RECORD +64 -0
fabricatio/models/utils.py +0 -148
fabricatio-0.2.9.dev4.data/scripts/tdown.exe +0 -0
fabricatio-0.2.9.dev4.dist-info/RECORD +0 -61
{fabricatio-0.2.9.dev4.dist-info → fabricatio-0.2.10.dist-info}/WHEEL +0 -0
{fabricatio-0.2.9.dev4.dist-info → fabricatio-0.2.10.dist-info}/licenses/LICENSE +0 -0

fabricatio/models/extra/problem.py CHANGED Viewed

@@ -1,11 +1,12 @@
 """A class representing a problem-solution pair identified during a review process."""
 from itertools import chain
-from typing import Any, List, Literal, Optional, Self, Tuple, Unpack
+from typing import Any, List, Optional, Self, Tuple, Unpack
 from fabricatio.journal import logger
 from fabricatio.models.generic import SketchedAble, WithBriefing
 from fabricatio.utils import ask_edit
+from pydantic import Field
 from questionary import Choice, checkbox, text
 from rich import print as r_print
@@ -13,36 +14,30 @@ from rich import print as r_print
 class Problem(SketchedAble, WithBriefing):
     """Represents a problem identified during review."""
-    description: str
-    """Description of the problem, The """
+    description: str = Field(alias="cause")
+    """The cause of the problem, including the root cause, the context, and the impact, make detailed enough for engineer to understand the problem and its impact."""
-    severity: Literal["low", "medium", "high"]
-    """Severity level of the problem."""
-    category: str
-    """Category of the problem."""
+    severity_level: int = Field(ge=0, le=10)
+    """Severity level of the problem, which is a number between 0 and 10, 0 means the problem is not severe, 10 means the problem is extremely severe."""
     location: str
     """Location where the problem was identified."""
-    recommendation: str
-    """Recommended solution or action."""
 class Solution(SketchedAble, WithBriefing):
     """Represents a proposed solution to a problem."""
-    description: str
+    description: str = Field(alias="mechanism")
     """Description of the solution, including a detailed description of the execution steps, and the mechanics, principle or fact."""
     execute_steps: List[str]
-    """A list of steps to execute to implement the solution, which is expected to be able to finally solve the corresponding problem."""
+    """A list of steps to execute to implement the solution, which is expected to be able to finally solve the corresponding problem, and which should be an Idiot-proof tutorial."""
-    feasibility: Literal["low", "medium", "high"]
-    """Feasibility level of the solution."""
+    feasibility_level: int = Field(ge=0, le=10)
+    """Feasibility level of the solution, which is a number between 0 and 10, 0 means the solution is not feasible, 10 means the solution is complete feasible."""
-    impact: Literal["low", "medium", "high"]
-    """Impact level of the solution."""
+    impact_level: int = Field(ge=0, le=10)
+    """Impact level of the solution, which is a number between 0 and 10, 0 means the solution is not impactful, 10 means the solution is extremely impactful."""
 class ProblemSolutions(SketchedAble):

fabricatio/models/extra/rag.py ADDED Viewed

@@ -0,0 +1,98 @@
+"""A module containing the RAG (Retrieval-Augmented Generation) models."""
+from abc import ABC
+from functools import partial
+from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Self, Sequence, Set
+from fabricatio.decorators import precheck_package
+from fabricatio.models.generic import Vectorizable
+from fabricatio.utils import ok
+from pydantic import JsonValue
+if TYPE_CHECKING:
+    from importlib.util import find_spec
+    from pydantic.fields import FieldInfo
+    if find_spec("pymilvus"):
+        from pymilvus import CollectionSchema
+class MilvusDataBase(Vectorizable, ABC):
+    """A base class for Milvus data."""
+    primary_field_name: ClassVar[str] = "id"
+    """The name of the primary field in Milvus."""
+    vector_field_name: ClassVar[str] = "vector"
+    """The name of the vector field in Milvus."""
+    index_type: ClassVar[str] = "FLAT"
+    """The type of index to be used in Milvus."""
+    metric_type: ClassVar[str] = "COSINE"
+    """The type of metric to be used in Milvus."""
+    def prepare_insertion(self, vector: List[float]) -> Dict[str, Any]:
+        """Prepares the data for insertion into Milvus.
+        Returns:
+            dict: A dictionary containing the data to be inserted into Milvus.
+        """
+        return {**self.model_dump(exclude_none=True, by_alias=True), self.vector_field_name: vector}
+    @classmethod
+    @precheck_package(
+        "pymilvus", "pymilvus is not installed. Have you installed `fabricatio[rag]` instead of `fabricatio`?"
+    )
+    def as_milvus_schema(cls, dimension: int = 1024) -> "CollectionSchema":
+        """Generates the schema for Milvus collection."""
+        from pymilvus import CollectionSchema, DataType, FieldSchema
+        fields = [
+            FieldSchema(cls.primary_field_name, dtype=DataType.INT64, is_primary=True, auto_id=True),
+            FieldSchema(cls.vector_field_name, dtype=DataType.FLOAT_VECTOR, dim=dimension),
+        ]
+        for k, v in cls.model_fields.items():
+            k: str
+            v: FieldInfo
+            schema = partial(FieldSchema, k, description=v.description or "")
+            anno = ok(v.annotation)
+            if anno == int:
+                fields.append(schema(dtype=DataType.INT64))
+            elif anno == str:
+                fields.append(schema(dtype=DataType.VARCHAR, max_length=65535))
+            elif anno == float:
+                fields.append(schema(dtype=DataType.DOUBLE))
+            elif anno == list[str] or anno == List[str] or anno == set[str] or anno == Set[str]:
+                fields.append(
+                    schema(dtype=DataType.ARRAY, element_type=DataType.VARCHAR, max_length=65535, max_capacity=4096)
+                )
+            elif anno == list[int] or anno == List[int] or anno == set[int] or anno == Set[int]:
+                fields.append(schema(dtype=DataType.ARRAY, element_type=DataType.INT64, max_capacity=4096))
+            elif anno == list[float] or anno == List[float] or anno == set[float] or anno == Set[float]:
+                fields.append(schema(dtype=DataType.ARRAY, element_type=DataType.DOUBLE, max_capacity=4096))
+            elif anno == JsonValue:
+                fields.append(schema(dtype=DataType.JSON))
+            else:
+                raise NotImplementedError(f"{k}:{anno} is not supported")
+        return CollectionSchema(fields)
+    @classmethod
+    def from_sequence(cls, data: Sequence[Dict[str, Any]]) -> List[Self]:
+        """Constructs a list of instances from a sequence of dictionaries."""
+        return [cls(**d) for d in data]
+class MilvusClassicModel(MilvusDataBase):
+    """A class representing a classic model stored in Milvus."""
+    text: str
+    """The text to be stored in Milvus."""
+    subject: str = ""
+    """The subject of the text."""
+    def _prepare_vectorization_inner(self) -> str:
+        return self.text

fabricatio/models/extra/rule.py CHANGED Viewed

@@ -40,12 +40,11 @@ class RuleSet(SketchedAble, PersistentAble, WithBriefing, Language):
     framework for the topic or domain covered by the rule set."""
     @classmethod
-    def gather(cls, *rulesets: Unpack[Tuple["RuleSet",...]]) -> Self:
+    def gather(cls, *rulesets: Unpack[Tuple["RuleSet", ...]]) -> Self:
         """Gathers multiple rule sets into a single rule set."""
         if not rulesets:
             raise ValueError("No rulesets provided")
         return cls(
-            language=rulesets[0].language,
             name=";".join(ruleset.name for ruleset in rulesets),
             description=";".join(ruleset.description for ruleset in rulesets),
             rules=list(flatten(r.rules for r in rulesets)),

fabricatio/models/generic.py CHANGED Viewed

@@ -3,15 +3,14 @@
 from abc import ABC, abstractmethod
 from datetime import datetime
 from pathlib import Path
-from typing import Any, Callable, Dict, Iterable, List, Optional, Self, Type, Union, final, overload
+from typing import Any, Callable, Dict, Iterable, List, Mapping, Optional, Self, Type, Union, final, overload
 import orjson
-import rtoml
 from fabricatio.config import configs
 from fabricatio.fs.readers import MAGIKA, safe_text_read
 from fabricatio.journal import logger
 from fabricatio.parser import JsonCapture
-from fabricatio.rust import blake3_hash
+from fabricatio.rust import blake3_hash, detect_language
 from fabricatio.rust_instances import TEMPLATE_MANAGER
 from fabricatio.utils import ok
 from litellm.utils import token_counter
@@ -53,7 +52,7 @@ class Display(Base):
         Returns:
             str: JSON string with 1-level indentation for readability
         """
-        return self.model_dump_json(indent=1)
+        return self.model_dump_json(indent=1, by_alias=True)
     def compact(self) -> str:
         """Generate compact JSON representation.
@@ -61,7 +60,7 @@ class Display(Base):
         Returns:
             str: Minified JSON string without whitespace
         """
-        return self.model_dump_json()
+        return self.model_dump_json(by_alias=True)
     @staticmethod
     def seq_display(seq: Iterable["Display"], compact: bool = False) -> str:
@@ -118,6 +117,15 @@ class WordCount(Base):
     """Expected word count of this research component."""
+class FromMapping(Base):
+    """Class that provides a method to generate a list of objects from a mapping."""
+    @classmethod
+    @abstractmethod
+    def from_mapping(cls, mapping: Mapping[str, Any], **kwargs: Any) -> List[Self]:
+        """Generate a list of objects from a mapping."""
 class AsPrompt(Base):
     """Class that provides a method to generate a prompt from the model.
@@ -225,7 +233,7 @@ class PersistentAble(Base):
             - Hash generated from JSON content ensures uniqueness
         """
         p = Path(path)
-        out = self.model_dump_json(indent=1)
+        out = self.model_dump_json(indent=1, by_alias=True)
         # Generate a timestamp in the format YYYYMMDD_HHMMSS
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
@@ -298,8 +306,17 @@ class PersistentAble(Base):
 class Language(Base):
     """Class that provides a language attribute."""
-    language: str
-    """The fullname of the written language of this object."""
+    @property
+    def language(self) -> str:
+        """Get the language of the object."""
+        if isinstance(self, Described):
+            return detect_language(self.description)
+        if isinstance(self, Titled):
+            return detect_language(self.title)
+        if isinstance(self, Named):
+            return detect_language(self.name)
+        return detect_language(self.model_dump_json(by_alias=True))
 class ModelHash(Base):
@@ -543,7 +560,7 @@ class FinalizedDumpAble(Base):
         Returns:
             str: The finalized dump of the object.
         """
-        return self.model_dump_json()
+        return self.model_dump_json(indent=1, by_alias=True)
     def finalized_dump_to(self, path: str | Path) -> Self:
         """Finalize the dump of the object to a file.
@@ -655,8 +672,9 @@ class Vectorizable(Base):
     This class includes methods to prepare the model for vectorization, ensuring it fits within a specified token length.
     """
+    @abstractmethod
     def _prepare_vectorization_inner(self) -> str:
-        return rtoml.dumps(self.model_dump())
+        """Prepare the model for vectorization."""
     @final
     def prepare_vectorization(self, max_length: Optional[int] = None) -> str:
@@ -674,8 +692,7 @@ class Vectorizable(Base):
         max_length = max_length or configs.embedding.max_sequence_length
         chunk = self._prepare_vectorization_inner()
         if max_length and (length := token_counter(text=chunk)) > max_length:
-            logger.error(err := f"Chunk exceeds maximum sequence length {max_length}, got {length}, see {chunk}")
-            raise ValueError(err)
+            raise ValueError(f"Chunk exceeds maximum sequence length {max_length}, got {length}, see \n{chunk}")
         return chunk
@@ -726,6 +743,12 @@ class ScopedConfig(Base):
     llm_rpm: Optional[PositiveInt] = None
     """The requests per minute of the LLM model."""
+    llm_presence_penalty: Optional[PositiveFloat] = None
+    """The presence penalty of the LLM model."""
+    llm_frequency_penalty: Optional[PositiveFloat] = None
+    """The frequency penalty of the LLM model."""
     embedding_api_endpoint: Optional[HttpUrl] = None
     """The OpenAI API endpoint."""

fabricatio/models/kwargs_types.py CHANGED Viewed

@@ -1,47 +1,16 @@
 """This module contains the types for the keyword arguments of the methods in the models module."""
-from importlib.util import find_spec
-from typing import Any, Dict, List, Optional, Required, TypedDict
+from typing import Any, Dict, List, NotRequired, Optional, Required, TypedDict
 from litellm.caching.caching import CacheMode
 from litellm.types.caching import CachingSupportedCallTypes
-if find_spec("pymilvus"):
-    from pymilvus import CollectionSchema
-    from pymilvus.milvus_client import IndexParams
-    class CollectionConfigKwargs(TypedDict, total=False):
-        """Configuration parameters for a vector collection.
+class ChunkKwargs(TypedDict):
+    """Configuration parameters for chunking operations."""
-        These arguments are typically used when configuring connections to vector databases.
-        """
-        dimension: int | None
-        primary_field_name: str
-        id_type: str
-        vector_field_name: str
-        metric_type: str
-        timeout: float | None
-        schema: CollectionSchema | None
-        index_params: IndexParams | None
-class FetchKwargs(TypedDict, total=False):
-    """Arguments for fetching data from vector collections.
-    Controls how data is retrieved from vector databases, including filtering
-    and result limiting parameters.
-    """
-    collection_name: str | None
-    similarity_threshold: float
-    result_per_query: int
-class RetrievalKwargs(FetchKwargs, total=False):
-    """Arguments for retrieval operations."""
-    final_limit: int
+    max_chunk_size: int
+    max_overlapping_rate: NotRequired[float]
 class EmbeddingKwargs(TypedDict, total=False):
@@ -76,6 +45,8 @@ class LLMKwargs(TypedDict, total=False):
     no_store: bool  # If store the response of this call to cache
     cache_ttl: int  # how long the stored cache is alive, in seconds
     s_maxage: int  # max accepted age of cached response, in seconds
+    presence_penalty: float
+    frequency_penalty: float
 class GenerateKwargs(LLMKwargs, total=False):
@@ -139,6 +110,7 @@ class ReviewKwargs[T](ReviewInnerKwargs[T], total=False):
 class ReferencedKwargs[T](ValidateKwargs[T], total=False):
     """Arguments for content review operations."""
     reference: str

fabricatio/models/task.py CHANGED Viewed

@@ -7,11 +7,11 @@ from asyncio import Queue
 from typing import Any, List, Optional, Self
 from fabricatio.config import configs
+from fabricatio.constants import TaskStatus
 from fabricatio.core import env
 from fabricatio.journal import logger
 from fabricatio.models.events import Event, EventLike
 from fabricatio.models.generic import ProposedAble, WithBriefing, WithDependency
-from fabricatio.models.utils import TaskStatus
 from fabricatio.rust_instances import TEMPLATE_MANAGER
 from pydantic import Field, PrivateAttr
@@ -112,12 +112,12 @@ class Task[T](WithBriefing, ProposedAble, WithDependency):
         """Return a formatted status label for the task.
         Args:
-            status (TaskStatus): The status of the task.
+            status (fabricatio.constants.TaskStatus): The status of the task.
         Returns:
             str: The formatted status label.
         """
-        return self._namespace.derive(self.name).push(status.value).collapse()
+        return self._namespace.derive(self.name).push(status).collapse()
     @property
     def pending_label(self) -> str:

fabricatio/models/usages.py CHANGED Viewed

@@ -2,7 +2,7 @@
 import traceback
 from asyncio import gather
-from typing import Callable, Dict, Iterable, List, Optional, Self, Sequence, Set, Union, Unpack, overload
+from typing import Callable, Dict, Iterable, List, Literal, Optional, Self, Sequence, Set, Union, Unpack, overload
 import asyncstdlib
 import litellm
@@ -13,7 +13,6 @@ from fabricatio.models.generic import ScopedConfig, WithBriefing
 from fabricatio.models.kwargs_types import ChooseKwargs, EmbeddingKwargs, GenerateKwargs, LLMKwargs, ValidateKwargs
 from fabricatio.models.task import Task
 from fabricatio.models.tool import Tool, ToolBox
-from fabricatio.models.utils import Messages
 from fabricatio.parser import GenericCapture, JsonCapture
 from fabricatio.rust_instances import TEMPLATE_MANAGER
 from fabricatio.utils import ok
@@ -28,7 +27,7 @@ from litellm.types.utils import (
 )
 from litellm.utils import CustomStreamWrapper, token_counter  # pyright: ignore [reportPrivateImportUsage]
 from more_itertools import duplicates_everseen
-from pydantic import Field, NonNegativeInt, PositiveInt
+from pydantic import BaseModel, ConfigDict, Field, NonNegativeInt, PositiveInt
 if configs.cache.enabled and configs.cache.type:
     litellm.enable_cache(type=configs.cache.type, **configs.cache.params)
@@ -64,7 +63,7 @@ class LLMUsage(ScopedConfig):
         self._added_deployment = ROUTER.upsert_deployment(deployment)
         return ROUTER
-    # noinspection PyTypeChecker,PydanticTypeChecker
+    # noinspection PyTypeChecker,PydanticTypeChecker,t
     async def aquery(
         self,
         messages: List[Dict[str, str]],
@@ -123,6 +122,12 @@ class LLMUsage(ScopedConfig):
                 "cache-ttl": kwargs.get("cache_ttl"),
                 "s-maxage": kwargs.get("s_maxage"),
             },
+            presence_penalty=kwargs.get("presence_penalty")
+            or self.llm_presence_penalty
+            or configs.llm.presence_penalty,
+            frequency_penalty=kwargs.get("frequency_penalty")
+            or self.llm_frequency_penalty
+            or configs.llm.frequency_penalty,
         )
     async def ainvoke(
@@ -303,7 +308,7 @@ class LLMUsage(ScopedConfig):
                         and logger.debug("Co-extraction is enabled.") is None
                         and (
                             validated := validator(
-                                response:=await self.aask(
+                                response := await self.aask(
                                     question=(
                                         TEMPLATE_MANAGER.render_template(
                                             configs.templates.co_validation_template,
@@ -495,7 +500,7 @@ class LLMUsage(ScopedConfig):
         affirm_case: str = "",
         deny_case: str = "",
         **kwargs: Unpack[ValidateKwargs[bool]],
-    ) -> bool:
+    ) -> Optional[bool]:
         """Asynchronously judges a prompt using AI validation.
         Args:
@@ -732,3 +737,72 @@ class ToolBoxUsage(LLMUsage):
         for other in (x for x in others if isinstance(x, ToolBoxUsage)):
             other.toolboxes.update(self.toolboxes)
         return self
+class Message(BaseModel):
+    """A class representing a message."""
+    model_config = ConfigDict(use_attribute_docstrings=True)
+    role: Literal["user", "system", "assistant"]
+    """The role of the message sender."""
+    content: str
+    """The content of the message."""
+class Messages(list):
+    """A list of messages."""
+    def add_message(self, role: Literal["user", "system", "assistant"], content: str) -> Self:
+        """Adds a message to the list with the specified role and content.
+        Args:
+            role (Literal["user", "system", "assistant"]): The role of the message sender.
+            content (str): The content of the message.
+        Returns:
+            Self: The current instance of Messages to allow method chaining.
+        """
+        if content:
+            self.append(Message(role=role, content=content))
+        return self
+    def add_user_message(self, content: str) -> Self:
+        """Adds a user message to the list with the specified content.
+        Args:
+            content (str): The content of the user message.
+        Returns:
+            Self: The current instance of Messages to allow method chaining.
+        """
+        return self.add_message("user", content)
+    def add_system_message(self, content: str) -> Self:
+        """Adds a system message to the list with the specified content.
+        Args:
+            content (str): The content of the system message.
+        Returns:
+            Self: The current instance of Messages to allow method chaining.
+        """
+        return self.add_message("system", content)
+    def add_assistant_message(self, content: str) -> Self:
+        """Adds an assistant message to the list with the specified content.
+        Args:
+            content (str): The content of the assistant message.
+        Returns:
+            Self: The current instance of Messages to allow method chaining.
+        """
+        return self.add_message("assistant", content)
+    def as_list(self) -> List[Dict[str, str]]:
+        """Converts the messages to a list of dictionaries.
+        Returns:
+            list[dict]: A list of dictionaries representing the messages.
+        """
+        return [message.model_dump() for message in self]

fabricatio/rust.cp312-win_amd64.pyd CHANGED Viewed

Binary file