PyPI - fabricatio - Versions diffs - 0.3.13__cp312-cp312-win_amd64.whl → 0.3.14__cp312-cp312-win_amd64.whl - Mend

fabricatio 0.3.13__cp312-cp312-win_amd64.whl → 0.3.14__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

fabricatio/__init__.py +6 -13
fabricatio/actions/article.py +87 -50
fabricatio/actions/article_rag.py +59 -68
fabricatio/actions/output.py +58 -24
fabricatio/actions/rag.py +2 -3
fabricatio/capabilities/advanced_judge.py +4 -7
fabricatio/capabilities/advanced_rag.py +2 -1
fabricatio/capabilities/censor.py +5 -4
fabricatio/capabilities/check.py +27 -27
fabricatio/capabilities/correct.py +22 -22
fabricatio/capabilities/extract.py +33 -33
fabricatio/capabilities/persist.py +103 -0
fabricatio/capabilities/propose.py +2 -2
fabricatio/capabilities/rag.py +11 -10
fabricatio/capabilities/rating.py +66 -70
fabricatio/capabilities/review.py +12 -11
fabricatio/capabilities/task.py +19 -18
fabricatio/decorators.py +11 -9
fabricatio/{core.py → emitter.py} +17 -19
fabricatio/journal.py +2 -4
fabricatio/models/action.py +15 -32
fabricatio/models/extra/aricle_rag.py +13 -8
fabricatio/models/extra/article_base.py +57 -25
fabricatio/models/extra/article_essence.py +2 -1
fabricatio/models/extra/article_main.py +24 -22
fabricatio/models/extra/article_outline.py +2 -1
fabricatio/models/extra/article_proposal.py +1 -1
fabricatio/models/extra/rag.py +2 -2
fabricatio/models/extra/rule.py +2 -1
fabricatio/models/generic.py +55 -137
fabricatio/models/kwargs_types.py +1 -54
fabricatio/models/role.py +49 -28
fabricatio/models/task.py +3 -4
fabricatio/models/tool.py +6 -7
fabricatio/models/usages.py +146 -149
fabricatio/parser.py +59 -99
fabricatio/rust.cp312-win_amd64.pyd +0 -0
fabricatio/rust.pyi +58 -81
fabricatio/utils.py +63 -162
fabricatio-0.3.14.data/scripts/tdown.exe +0 -0
fabricatio-0.3.14.data/scripts/ttm.exe +0 -0
{fabricatio-0.3.13.dist-info → fabricatio-0.3.14.dist-info}/METADATA +10 -13
fabricatio-0.3.14.dist-info/RECORD +64 -0
{fabricatio-0.3.13.dist-info → fabricatio-0.3.14.dist-info}/WHEEL +1 -1
fabricatio-0.3.13.data/scripts/tdown.exe +0 -0
fabricatio-0.3.13.data/scripts/ttm.exe +0 -0
fabricatio-0.3.13.dist-info/RECORD +0 -63
{fabricatio-0.3.13.dist-info → fabricatio-0.3.14.dist-info}/licenses/LICENSE +0 -0

fabricatio/parser.py CHANGED Viewed

@@ -1,152 +1,112 @@
-"""A module to parse text using regular expressions."""
+"""A module for capturing patterns in text using regular expressions."""
 import re
+from dataclasses import dataclass, field
 from functools import lru_cache
-from re import Pattern, compile
-from typing import Any, Callable, Iterable, List, Optional, Self, Tuple, Type
+from typing import Any, Callable, Iterable, List, Optional, Self, Tuple, Type, Union
 import ujson
-from fabricatio.rust import CONFIG
 from json_repair import repair_json
-from pydantic import BaseModel, ConfigDict, Field, PositiveInt, PrivateAttr, ValidationError
 from fabricatio.journal import logger
+from fabricatio.rust import CONFIG
-class Capture(BaseModel):
+@dataclass(frozen=True)
+class Capture:
     """A class to capture patterns in text using regular expressions.
     Attributes:
-        pattern (str): The regular expression pattern to search for.
-        _compiled (Pattern): The compiled regular expression pattern.
+        target_groups (Tuple[int, ...]): The target groups to extract from the match.
+        pattern (str): The regex pattern to search for.
+        flags (int): Flags to apply when compiling the regex.
+        capture_type (Optional[str]): Optional hint for post-processing (e.g., 'json').
     """
-    model_config = ConfigDict(use_attribute_docstrings=True)
-    target_groups: Tuple[int, ...] = Field(default_factory=tuple)
-    """The target groups to capture from the pattern."""
-    pattern: str = Field(frozen=True)
+    pattern: str = field()
     """The regular expression pattern to search for."""
-    flags: PositiveInt = Field(default=re.DOTALL | re.MULTILINE | re.IGNORECASE, frozen=True)
-    """The flags to use when compiling the regular expression pattern."""
+    flags: int = re.DOTALL | re.MULTILINE | re.IGNORECASE
+    """Flags to control regex behavior (DOTALL, MULTILINE, IGNORECASE by default)."""
     capture_type: Optional[str] = None
-    """The type of capture to perform, e.g., 'json', which is used to dispatch the fixer accordingly."""
-    _compiled: Pattern = PrivateAttr()
-    def model_post_init(self, __context: Any) -> None:
-        """Initialize the compiled pattern."""
-        self._compiled = compile(self.pattern, self.flags)
-    def fix[T](self, text: str | Iterable[str] | T) -> str | List[str] | T:
-        """Fix the text using the pattern.
+    """Optional type identifier for post-processing (e.g., 'json' for JSON repair)."""
+    target_groups: Tuple[int, ...] = field(default_factory=tuple)
+    """Tuple of group indices to extract from the match (1-based indexing)."""
-        Args:
-            text (str | List[str]): The text to fix.
-        Returns:
-            str | List[str]: The fixed text with the same type as input.
-        """
+    def fix(self, text: Union[str, Iterable[str], Any]) -> Union[str, List[str], Any]:
+        """Fix the text based on capture_type (e.g., JSON repair)."""
         match self.capture_type:
             case "json" if CONFIG.general.use_json_repair:
-                logger.debug("Applying json repair to text.")
+                logger.debug("Applying JSON repair to text.")
                 if isinstance(text, str):
-                    return repair_json(text, ensure_ascii=False)  # pyright: ignore [reportReturnType]
-                return [repair_json(item, ensure_ascii=False) for item in
-                        text]  # pyright: ignore [reportReturnType, reportGeneralTypeIssues]
+                    return repair_json(text, ensure_ascii=False)
+                return [repair_json(item, ensure_ascii=False) for item in text]
             case _:
-                return text  # pyright: ignore [reportReturnType]
-    def capture(self, text: str) -> Tuple[str, ...] | str | None:
-        """Capture the first occurrence of the pattern in the given text.
-        Args:
-            text (str): The text to search the pattern in.
-        Returns:
-            str | None: The captured text if the pattern is found, otherwise None.
-        """
-        if (match := self._compiled.match(text) or self._compiled.search(text)) is None:
-            logger.debug(f"Capture Failed {type(text)}: \n{text}")
+                return text
+    def capture(self, text: str) -> Optional[Union[str, Tuple[str, ...]]]:
+        """Capture the first match of the pattern in the text."""
+        compiled = re.compile(self.pattern, self.flags)
+        match = compiled.match(text) or compiled.search(text)
+        if match is None:
+            logger.debug(f"Capture Failed: {text}")
             return None
         groups = self.fix(match.groups())
         if self.target_groups:
             cap = tuple(groups[g - 1] for g in self.target_groups)
-            logger.debug(f"Captured text: {'\n\n'.join(cap)}")
+            logger.debug(f"Captured texts: {'\n==\n'.join(cap)}")
             return cap
         cap = groups[0]
         logger.debug(f"Captured text: \n{cap}")
         return cap
-    def convert_with[T](self, text: str, convertor: Callable[[Tuple[str, ...]], T] | Callable[[str], T]) -> T | None:
-        """Convert the given text using the pattern.
-        Args:
-            text (str): The text to search the pattern in.
-            convertor (Callable[[Tuple[str, ...]], T] | Callable[[str], T]): The function to convert the captured text.
-        Returns:
-            str | None: The converted text if the pattern is found, otherwise None.
-        """
+    def convert_with(
+        self,
+        text: str,
+        convertor: Callable[[Union[str, Tuple[str, ...]]], Any],
+    ) -> Optional[Any]:
+        """Convert captured text using a provided function."""
         if (cap := self.capture(text)) is None:
             return None
         try:
-            return convertor(cap)  # pyright: ignore [reportArgumentType]
-        except (ValueError, SyntaxError, ValidationError) as e:
-            logger.error(f"Failed to convert text using {convertor.__name__} to convert.\nerror: {e}\n {cap}")
+            return convertor(cap)
+        except Exception as e:  # noqa: BLE001
+            logger.error(f"Failed to convert text using {convertor.__name__}: {e}\n{cap}")
             return None
-    def validate_with[K, T, E](
-            self,
-            text: str,
-            target_type: Type[T],
-            elements_type: Optional[Type[E]] = None,
-            length: Optional[int] = None,
-            deserializer: Callable[[Tuple[str, ...]], K] | Callable[[str], K] = ujson.loads,
-    ) -> T | None:
-        """Validate the given text using the pattern.
-        Args:
-            text (str): The text to search the pattern in.
-            target_type (Type[T]): The expected type of the output, dict or list.
-            elements_type (Optional[Type[E]]): The expected type of the elements in the output dict keys or list elements.
-            length (Optional[int]): The expected length of the output, bool(length)==False means no length validation.
-            deserializer (Callable[[Tuple[str, ...]], K] | Callable[[str], K]): The function to deserialize the captured text.
-        Returns:
-            T | None: The validated text if the pattern is found and the output is of the expected type, otherwise None.
-        """
-        judges = [lambda output_obj: isinstance(output_obj, target_type)]
+    def validate_with[T, K, E](
+        self,
+        text: str,
+        target_type: Type[T],
+        elements_type: Optional[Type[E]] = None,
+        length: Optional[int] = None,
+        deserializer: Callable[[Union[str, Tuple[str, ...]]], K] = lambda x: ujson.loads(x) if isinstance(x, str) else ujson.loads(x[0]),
+    ) -> Optional[T]:
+        """Deserialize and validate the captured text against expected types."""
+        judges = [lambda obj: isinstance(obj, target_type)]
         if elements_type:
-            judges.append(lambda output_obj: all(isinstance(e, elements_type) for e in output_obj))
+            judges.append(lambda obj: all(isinstance(e, elements_type) for e in obj))
         if length:
-            judges.append(lambda output_obj: len(output_obj) == length)
+            judges.append(lambda obj: len(obj) == length)
         if (out := self.convert_with(text, deserializer)) and all(j(out) for j in judges):
-            return out  # pyright: ignore [reportReturnType]
+            return out  # type: ignore
         return None
     @classmethod
     @lru_cache(32)
     def capture_code_block(cls, language: str) -> Self:
-        """Capture the first occurrence of a code block in the given text.
-        Args:
-            language (str): The text containing the code block.
-        Returns:
-            Self: The instance of the class with the captured code block.
-        """
+        """Capture a code block of the given language."""
         return cls(pattern=f"```{language}(.*?)```", capture_type=language)
     @classmethod
     @lru_cache(32)
     def capture_generic_block(cls, language: str) -> Self:
-        """Capture the first occurrence of a generic code block in the given text.
-        Returns:
-            Self: The instance of the class with the captured code block.
-        """
-        return cls(pattern=f"--- Start of {language} ---(.*?)--- end of {language} ---", capture_type=language)
+        """Capture a generic block of the given language."""
+        return cls(
+            pattern=f"--- Start of {language} ---(.*?)--- End of {language} ---",
+            capture_type=language,
+        )
 JsonCapture = Capture.capture_code_block("json")

fabricatio/rust.cp312-win_amd64.pyd CHANGED Viewed

Binary file

fabricatio/rust.pyi CHANGED Viewed

@@ -10,12 +10,13 @@ Key Features:
 - Cryptographic utilities: BLAKE3 hashing.
 - Text utilities: Word boundary splitting and word counting.
 """
 from enum import StrEnum
-from typing import Any, Dict, List, Optional, Self, Tuple, overload, Union
+from pathlib import Path
+from typing import Any, Dict, List, Literal, Optional, Self, Tuple, Union, overload
 from pydantic import JsonValue
 class TemplateManager:
     """Template rendering engine using Handlebars templates.
@@ -47,10 +48,8 @@ class TemplateManager:
     @overload
     def render_template(self, name: str, data: Dict[str, Any]) -> str: ...
     @overload
     def render_template(self, name: str, data: List[Dict[str, Any]]) -> List[str]: ...
     def render_template(self, name: str, data: Dict[str, Any] | List[Dict[str, Any]]) -> str | List[str]:
         """Render a template with context data.
@@ -67,10 +66,8 @@ class TemplateManager:
     @overload
     def render_template_raw(self, template: str, data: Dict[str, Any]) -> str: ...
     @overload
     def render_template_raw(self, template: str, data: List[Dict[str, Any]]) -> List[str]: ...
     def render_template_raw(self, template: str, data: Dict[str, Any] | List[Dict[str, Any]]) -> str | List[str]:
         """Render a template with context data.
@@ -82,7 +79,6 @@ class TemplateManager:
             Rendered template content as string or list of strings
         """
 class BibManager:
     """BibTeX bibliography manager for parsing and querying citation data."""
@@ -191,7 +187,6 @@ class BibManager:
             Field value if found, None otherwise
         """
 def blake3_hash(content: bytes) -> str:
     """Calculate the BLAKE3 cryptographic hash of data.
@@ -202,11 +197,9 @@ def blake3_hash(content: bytes) -> str:
         Hex-encoded BLAKE3 hash string
     """
 def detect_language(string: str) -> str:
     """Detect the language of a given string."""
 def split_word_bounds(string: str) -> List[str]:
     """Split the string into words based on word boundaries.
@@ -217,7 +210,6 @@ def split_word_bounds(string: str) -> List[str]:
         A list of words extracted from the string.
     """
 def split_sentence_bounds(string: str) -> List[str]:
     """Split the string into sentences based on sentence boundaries.
@@ -228,7 +220,6 @@ def split_sentence_bounds(string: str) -> List[str]:
         A list of sentences extracted from the string.
     """
 def split_into_chunks(string: str, max_chunk_size: int, max_overlapping_rate: float = 0.3) -> List[str]:
     """Split the string into chunks of a specified size.
@@ -241,7 +232,6 @@ def split_into_chunks(string: str, max_chunk_size: int, max_overlapping_rate: fl
         A list of chunks extracted from the string.
     """
 def word_count(string: str) -> int:
     """Count the number of words in the string.
@@ -252,67 +242,51 @@ def word_count(string: str) -> int:
         The number of words in the string.
     """
 def is_chinese(string: str) -> bool:
     """Check if the given string is in Chinese."""
 def is_english(string: str) -> bool:
     """Check if the given string is in English."""
 def is_japanese(string: str) -> bool:
     """Check if the given string is in Japanese."""
 def is_korean(string: str) -> bool:
     """Check if the given string is in Korean."""
 def is_arabic(string: str) -> bool:
     """Check if the given string is in Arabic."""
 def is_russian(string: str) -> bool:
     """Check if the given string is in Russian."""
 def is_german(string: str) -> bool:
     """Check if the given string is in German."""
 def is_french(string: str) -> bool:
     """Check if the given string is in French."""
 def is_hindi(string: str) -> bool:
     """Check if the given string is in Hindi."""
 def is_italian(string: str) -> bool:
     """Check if the given string is in Italian."""
 def is_dutch(string: str) -> bool:
     """Check if the given string is in Dutch."""
 def is_portuguese(string: str) -> bool:
     """Check if the given string is in Portuguese."""
 def is_swedish(string: str) -> bool:
     """Check if the given string is in Swedish."""
 def is_turkish(string: str) -> bool:
     """Check if the given string is in Turkish."""
 def is_vietnamese(string: str) -> bool:
     """Check if the given string is in Vietnamese."""
 def tex_to_typst(string: str) -> str:
     """Convert TeX to Typst.
@@ -323,29 +297,18 @@ def tex_to_typst(string: str) -> str:
         The converted Typst string.
     """
+def convert_all_tex_math(string: str) -> str:
+    r"""Unified function to convert all supported TeX math expressions in a string to Typst format.
-def convert_all_inline_tex(string: str) -> str:
-    """Convert all inline TeX code in the string.
+    Handles $...$, $$...$$, \\(...\\), and \\[...\\]
     Args:
-        string: The input string containing inline TeX code wrapped in $code$.
+        string: The input string containing TeX math expressions.
     Returns:
-        The converted string with inline TeX code replaced.
+        The string with TeX math expressions converted to Typst format.
     """
-def convert_all_block_tex(string: str) -> str:
-    """Convert all block TeX code in the string.
-    Args:
-        string: The input string containing block TeX code wrapped in $$code$$.
-    Returns:
-        The converted string with block TeX code replaced.
-    """
 def fix_misplaced_labels(string: str) -> str:
     """A func to fix labels in a string.
@@ -356,9 +319,8 @@ def fix_misplaced_labels(string: str) -> str:
         The fixed string with labels properly placed.
     """
 def comment(string: str) -> str:
-    """Add comment to the string.
+    r"""Add comment to the string.
     Args:
         string: The input string to which comments will be added.
@@ -367,7 +329,6 @@ def comment(string: str) -> str:
         The string with each line prefixed by '// '.
     """
 def uncomment(string: str) -> str:
     """Remove comment from the string.
@@ -378,6 +339,15 @@ def uncomment(string: str) -> str:
         The string with comments (lines starting with '// ' or '//') removed.
     """
+def strip_comment(string: str) -> str:
+    """Remove leading and trailing comment lines from a multi-line string.
+    Args:
+        string: Input string that may have comment lines at start and/or end
+    Returns:
+        str: A new string with leading and trailing comment lines removed
+    """
 def split_out_metadata(string: str) -> Tuple[Optional[JsonValue], str]:
     """Split out metadata from a string.
@@ -389,7 +359,6 @@ def split_out_metadata(string: str) -> Tuple[Optional[JsonValue], str]:
         A tuple containing the metadata as a Python object (if parseable) and the remaining string.
     """
 def to_metadata(data: JsonValue) -> str:
     """Convert a Python object to a YAML string.
@@ -400,16 +369,7 @@ def to_metadata(data: JsonValue) -> str:
         The YAML string representation of the input data.
     """
-def convert_to_inline_formula(string: str) -> str:
-    r"""Convert `$...$` to inline formula `\(...\)` and trim spaces."""
-def convert_to_block_formula(string: str) -> str:
-    r"""Convert `$$...$$` to block formula `\[...\]` and trim spaces."""
-def inplace_update(string: str, wrapper: str, new_body: str) -> Optional[str]:
+def replace_thesis_body(string: str, wrapper: str, new_body: str) -> Optional[str]:
     """Replace content between wrapper strings.
     Args:
@@ -422,7 +382,6 @@ def inplace_update(string: str, wrapper: str, new_body: str) -> Optional[str]:
     """
 def extract_body(string: str, wrapper: str) -> Optional[str]:
     """Extract the content between two occurrences of a wrapper string.
@@ -434,7 +393,6 @@ def extract_body(string: str, wrapper: str) -> Optional[str]:
         The content between the first two occurrences of the wrapper string if found, otherwise None.
     """
 class LLMConfig:
     """LLM configuration structure.
@@ -486,7 +444,6 @@ class LLMConfig:
     frequency_penalty: Optional[float]
     """Penalizes new tokens based on their frequency in text so far (-2.0-2.0)."""
 class EmbeddingConfig:
     """Embedding configuration structure."""
@@ -511,7 +468,6 @@ class EmbeddingConfig:
     api_key: Optional[SecretStr]
     """The API key."""
 class RagConfig:
     """RAG (Retrieval Augmented Generation) configuration structure."""
@@ -527,18 +483,16 @@ class RagConfig:
     milvus_dimensions: Optional[int]
     """The dimensions for Milvus vectors."""
 class DebugConfig:
     """Debug configuration structure."""
     log_level: Optional[str]
     """The logging level to use."""
 class TemplateManagerConfig:
     """Template manager configuration structure."""
-    template_dir: List[str]
+    template_dir: List[Path]
     """The directories containing the templates."""
     active_loading: Optional[bool]
@@ -547,7 +501,6 @@ class TemplateManagerConfig:
     template_suffix: Optional[str]
     """The suffix of the templates."""
 class TemplateConfig:
     """Template configuration structure."""
@@ -632,7 +585,6 @@ class TemplateConfig:
     chap_summary_template: str
     """The name of the chap summary template which will be used to generate a chapter summary."""
 class RoutingConfig:
     """Routing configuration structure for controlling request dispatching behavior."""
@@ -648,7 +600,6 @@ class RoutingConfig:
     cooldown_time: Optional[int]
     """Time to cooldown a deployment after failure in seconds."""
 class GeneralConfig:
     """General configuration structure for application-wide settings."""
@@ -658,7 +609,6 @@ class GeneralConfig:
     use_json_repair: bool
     """Whether to automatically repair malformed JSON."""
 class ToolBoxConfig:
     """Configuration for toolbox functionality."""
@@ -668,7 +618,6 @@ class ToolBoxConfig:
     data_module_name: str
     """The name of the module containing the data."""
 class PymitterConfig:
     """Pymitter configuration structure for controlling event emission and listener behavior."""
@@ -681,7 +630,6 @@ class PymitterConfig:
     max_listeners: int
     """The maximum number of listeners per event. -1 means unlimited."""
 class Config:
     """Configuration structure containing all system components."""
@@ -715,27 +663,23 @@ class Config:
     pymitter: PymitterConfig
     """Pymitter configuration."""
 CONFIG: Config
 class SecretStr:
     """A string that should not be exposed."""
     def __init__(self, source: str) -> None: ...
-    def expose(self) -> str:
+    def get_secret_value(self) -> str:
         """Expose the secret string."""
 TEMPLATE_MANAGER: TemplateManager
 class Event:
     """Event class that represents a hierarchical event with segments.
     Events can be constructed from strings, lists of strings, or other Events.
     """
     segments: List[str]
     def __init__(self, segments: Optional[List[str]] = None) -> None:
@@ -841,12 +785,9 @@ class Event:
         """
     def __hash__(self) -> int: ...
     def __eq__(self, other: object) -> bool: ...
     def __ne__(self, other: object) -> bool: ...
 class TaskStatus(StrEnum, str):
     """Enumeration of possible task statuses."""
@@ -864,3 +805,39 @@ class TaskStatus(StrEnum, str):
     Cancelled: TaskStatus
     """Task has been cancelled."""
+class TEIClient:
+    """Client for TEI reranking service.
+    Handles communication with a TEI reranking service to reorder text snippets
+    based on their relevance to a query.
+    """
+    def __init__(self, base_url: str) -> None:
+        """Initialize the TEI client.
+        Args:
+            base_url: URL to the TEI reranking service
+        """
+    async def arerank(
+        self,
+        query: str,
+        texts: List[str],
+        truncate: bool = False,
+        truncation_direction: Literal["Left", "Right"] = "Left",
+    ) -> List[Tuple[int, float]]:
+        """Rerank texts based on relevance to query.
+        Args:
+            query: The query to match texts against
+            texts: List of text snippets to rerank
+            truncate: Whether to truncate texts to fit model context
+            truncation_direction: Direction to truncate from ("Left" or "Right")
+        Returns:
+            List of tuples containing (original_index, relevance_score)
+        Raises:
+            RuntimeError: If reranking fails or truncation_direction is invalid
+        """