PyPI - fabricatio - Versions diffs - 0.2.4.dev2__cp312-cp312-win_amd64.whl → 0.2.5__cp312-cp312-win_amd64.whl - Mend

fabricatio 0.2.4.dev2__cp312-cp312-win_amd64.whl → 0.2.5__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

fabricatio/__init__.py +14 -5
fabricatio/_rust.cp312-win_amd64.pyd +0 -0
fabricatio/_rust.pyi +65 -16
fabricatio/_rust_instances.py +2 -0
fabricatio/actions/article.py +46 -14
fabricatio/actions/output.py +21 -0
fabricatio/actions/rag.py +1 -1
fabricatio/capabilities/propose.py +14 -20
fabricatio/capabilities/rag.py +85 -26
fabricatio/capabilities/rating.py +59 -51
fabricatio/capabilities/review.py +241 -0
fabricatio/capabilities/task.py +7 -8
fabricatio/config.py +36 -4
fabricatio/fs/__init__.py +13 -1
fabricatio/fs/curd.py +27 -8
fabricatio/fs/readers.py +6 -3
fabricatio/journal.py +1 -1
fabricatio/models/action.py +6 -8
fabricatio/models/events.py +6 -4
fabricatio/models/extra.py +100 -25
fabricatio/models/generic.py +56 -4
fabricatio/models/kwargs_types.py +123 -35
fabricatio/models/role.py +3 -3
fabricatio/models/task.py +0 -14
fabricatio/models/tool.py +7 -6
fabricatio/models/usages.py +144 -101
fabricatio/parser.py +26 -5
fabricatio/toolboxes/__init__.py +1 -3
fabricatio/toolboxes/fs.py +17 -1
fabricatio/workflows/articles.py +10 -6
fabricatio/workflows/rag.py +11 -0
fabricatio-0.2.5.data/scripts/tdown.exe +0 -0
{fabricatio-0.2.4.dev2.dist-info → fabricatio-0.2.5.dist-info}/METADATA +2 -1
fabricatio-0.2.5.dist-info/RECORD +41 -0
fabricatio/toolboxes/task.py +0 -6
fabricatio-0.2.4.dev2.data/scripts/tdown.exe +0 -0
fabricatio-0.2.4.dev2.dist-info/RECORD +0 -39
{fabricatio-0.2.4.dev2.dist-info → fabricatio-0.2.5.dist-info}/WHEEL +0 -0
{fabricatio-0.2.4.dev2.dist-info → fabricatio-0.2.5.dist-info}/licenses/LICENSE +0 -0

fabricatio/__init__.py CHANGED Viewed

@@ -2,10 +2,12 @@
 from importlib.util import find_spec
+from fabricatio._rust import BibManager
 from fabricatio._rust_instances import template_manager
-from fabricatio.actions.article import ExtractArticleEssence
+from fabricatio.actions.article import ExtractArticleEssence, GenerateArticleProposal, GenerateOutline
+from fabricatio.actions.output import DumpFinalizedOutput
 from fabricatio.core import env
-from fabricatio.fs import magika
+from fabricatio.fs import magika, safe_json_read, safe_text_read
 from fabricatio.journal import logger
 from fabricatio.models.action import Action, WorkFlow
 from fabricatio.models.events import Event
@@ -15,15 +17,20 @@ from fabricatio.models.task import Task
 from fabricatio.models.tool import ToolBox
 from fabricatio.models.utils import Message, Messages
 from fabricatio.parser import Capture, CodeBlockCapture, JsonCapture, PythonCapture
-from fabricatio.toolboxes import arithmetic_toolbox, basic_toolboxes, fs_toolbox, task_toolbox
+from fabricatio.toolboxes import arithmetic_toolbox, basic_toolboxes, fs_toolbox
+from fabricatio.workflows.articles import WriteOutlineWorkFlow
 __all__ = [
     "Action",
     "ArticleEssence",
+    "BibManager",
     "Capture",
     "CodeBlockCapture",
+    "DumpFinalizedOutput",
     "Event",
     "ExtractArticleEssence",
+    "GenerateArticleProposal",
+    "GenerateOutline",
     "JsonCapture",
     "Message",
     "Messages",
@@ -32,13 +39,15 @@ __all__ = [
     "Task",
     "ToolBox",
     "WorkFlow",
+    "WriteOutlineWorkFlow",
     "arithmetic_toolbox",
     "basic_toolboxes",
     "env",
     "fs_toolbox",
     "logger",
     "magika",
-    "task_toolbox",
+    "safe_json_read",
+    "safe_text_read",
     "template_manager",
 ]
@@ -46,6 +55,6 @@ __all__ = [
 if find_spec("pymilvus"):
     from fabricatio.actions.rag import InjectToDB
     from fabricatio.capabilities.rag import RAG
-    from fabricatio.workflows.articles import StoreArticle
+    from fabricatio.workflows.rag import StoreArticle
     __all__ += ["RAG", "InjectToDB", "StoreArticle"]

fabricatio/_rust.cp312-win_amd64.pyd CHANGED Viewed

Binary file

fabricatio/_rust.pyi CHANGED Viewed

@@ -2,52 +2,101 @@ from pathlib import Path
 from typing import Any, Dict, List, Optional
 class TemplateManager:
-    """TemplateManager class for managing handlebars templates."""
+    """Template rendering engine using Handlebars templates.
+    This manager handles template discovery, loading, and rendering
+    through a wrapper around the handlebars-rust engine.
+    See: https://crates.io/crates/handlebars
+    """
     def __init__(
         self, template_dirs: List[Path], suffix: Optional[str] = None, active_loading: Optional[bool] = None
     ) -> None:
         """Initialize the template manager.
         Args:
-            template_dirs (List[Path]): A list of paths to directories containing templates.
-            suffix (str, optional): The suffix of template files. None means 'hbs' suffix.
-            active_loading (bool, optional): Whether to enable active loading of templates.
+            template_dirs: List of directories containing template files
+            suffix: File extension for templates (defaults to 'hbs')
+            active_loading: Whether to enable dev mode for reloading templates on change
         """
     @property
     def template_count(self) -> int:
-        """Get the number of templates discovered."""
+        """Returns the number of currently loaded templates."""
     def get_template_source(self, name: str) -> Optional[str]:
-        """Get the source path of a template by name.
+        """Get the filesystem path for a template.
         Args:
-            name (str): The name of the template to retrieve.
+            name: Template name (without extension)
         Returns:
-            Optional[str]: The source path of the template.
+            Path to the template file if found, None otherwise
         """
     def discover_templates(self) -> None:
-        """Discover templates in the specified directories."""
+        """Scan template directories and load available templates.
+        This refreshes the template cache, finding any new or modified templates.
+        """
     def render_template(self, name: str, data: Dict[str, Any]) -> str:
-        """Render a template with the given name and data.
+        """Render a template with context data.
         Args:
-            name (str): The name of the template to render.
-            data (Dict[str, Any]): The data to pass to the template.
+            name: Template name (without extension)
+            data: Context dictionary to provide variables to the template
         Returns:
-            str: The rendered template.
+            Rendered template content as string
+        Raises:
+            RuntimeError: If template rendering fails
         """
 def blake3_hash(content: bytes) -> str:
-    """Calculate the BLAKE3 hash of the given data.
+    """Calculate the BLAKE3 cryptographic hash of data.
     Args:
-        content (bytes): The data to hash.
+        content: Bytes to be hashed
     Returns:
-        str: The BLAKE3 hash of the data.
+        Hex-encoded BLAKE3 hash string
     """
+class BibManager:
+    """BibTeX bibliography manager for parsing and querying citation data."""
+    def __init__(self, path: str) -> None:
+        """Initialize the bibliography manager.
+        Args:
+            path: Path to BibTeX (.bib) file to load
+        Raises:
+            RuntimeError: If file cannot be read or parsed
+        """
+    def get_cite_key(self, title: str) -> Optional[str]:
+        """Find citation key by exact title match.
+        Args:
+            title: Full title to search for (case-insensitive)
+        Returns:
+            Citation key if exact match found, None otherwise
+        """
+    def get_cite_key_fuzzy(self, query: str) -> Optional[str]:
+        """Find best matching citation using fuzzy text search.
+        Args:
+            query: Search term to find in bibliography entries
+        Returns:
+            Citation key of best matching entry, or None if no good match
+        Notes:
+            Uses nucleo_matcher for high-quality fuzzy text searching
+            See: https://crates.io/crates/nucleo-matcher
+        """

fabricatio/_rust_instances.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""Some necessary instances."""
 from fabricatio._rust import TemplateManager
 from fabricatio.config import configs

fabricatio/actions/article.py CHANGED Viewed

@@ -2,11 +2,12 @@
 from os import PathLike
 from pathlib import Path
-from typing import Callable, List
+from typing import Callable, List, Optional
+from fabricatio.fs import safe_text_read
 from fabricatio.journal import logger
 from fabricatio.models.action import Action
-from fabricatio.models.extra import ArticleEssence
+from fabricatio.models.extra import ArticleEssence, ArticleOutline, ArticleProposal
 from fabricatio.models.task import Task
@@ -18,11 +19,6 @@ class ExtractArticleEssence(Action):
         which is converted from pdf files using `magic-pdf` from the `MinerU` project, see https://github.com/opendatalab/MinerU
     """
-    name: str = "extract article essence"
-    """The name of the action."""
-    description: str = "Extract the essence of article(s) from the paths specified in the task dependencies."
-    """The description of the action."""
     output_key: str = "article_essence"
     """The key of the output data."""
@@ -31,13 +27,9 @@ class ExtractArticleEssence(Action):
         task_input: Task,
         reader: Callable[[P], str] = lambda p: Path(p).read_text(encoding="utf-8"),
         **_,
-    ) -> List[ArticleEssence]:
-        if not await self.ajudge(
-            f"= Task\n{task_input.briefing}\n\n\n= Role\n{self.briefing}",
-            affirm_case="The task does not violate the role, and could be approved since the file dependencies are specified.",
-            deny_case="The task does violate the role, and could not be approved.",
-        ):
-            logger.info(err := "Task not approved.")
+    ) -> Optional[List[ArticleEssence]]:
+        if not task_input.dependencies:
+            logger.info(err := "Task not approved, since no dependencies are provided.")
             raise RuntimeError(err)
         # trim the references
@@ -47,3 +39,43 @@ class ExtractArticleEssence(Action):
             contents,
             system_message=f"# your personal briefing: \n{self.briefing}",
         )
+class GenerateArticleProposal(Action):
+    """Generate an outline for the article based on the extracted essence."""
+    output_key: str = "article_proposal"
+    """The key of the output data."""
+    async def _execute(
+        self,
+        task_input: Task,
+        **_,
+    ) -> Optional[ArticleProposal]:
+        input_path = await self.awhich_pathstr(
+            f"{task_input.briefing}\nExtract the path of file, which contains the article briefing that I need to read."
+        )
+        return await self.propose(
+            ArticleProposal,
+            safe_text_read(input_path),
+            system_message=f"# your personal briefing: \n{self.briefing}",
+        )
+class GenerateOutline(Action):
+    """Generate the article based on the outline."""
+    output_key: str = "article"
+    """The key of the output data."""
+    async def _execute(
+        self,
+        article_proposal: ArticleProposal,
+        **_,
+    ) -> Optional[ArticleOutline]:
+        return await self.propose(
+            ArticleOutline,
+            article_proposal.display(),
+            system_message=f"# your personal briefing: \n{self.briefing}",
+        )

fabricatio/actions/output.py ADDED Viewed

@@ -0,0 +1,21 @@
+"""Dump the finalized output to a file."""
+from typing import Unpack
+from fabricatio.models.action import Action
+from fabricatio.models.generic import FinalizedDumpAble
+from fabricatio.models.task import Task
+class DumpFinalizedOutput(Action):
+    """Dump the finalized output to a file."""
+    output_key: str = "dump_path"
+    async def _execute(self, task_input: Task, to_dump: FinalizedDumpAble, **cxt: Unpack) -> str:
+        dump_path = await self.awhich_pathstr(
+            f"{task_input.briefing}\n\nExtract a single path of the file, to which I will dump the data."
+        )
+        to_dump.finalized_dump_to(dump_path)
+        return dump_path

fabricatio/actions/rag.py CHANGED Viewed

@@ -14,7 +14,7 @@ class InjectToDB(Action, RAG):
     async def _execute[T: PrepareVectorization](
         self, to_inject: T | List[T], collection_name: Optional[str] = "my_collection", **cxt: Unpack
-    ) -> str:
+    ) -> Optional[str]:
         if not isinstance(to_inject, list):
             to_inject = [to_inject]

fabricatio/capabilities/propose.py CHANGED Viewed

@@ -1,37 +1,37 @@
 """A module for the task capabilities of the Fabricatio library."""
-from typing import List, Type, Unpack, overload
+from typing import List, Optional, Type, Unpack, overload
 from fabricatio.models.generic import ProposedAble
-from fabricatio.models.kwargs_types import GenerateKwargs
+from fabricatio.models.kwargs_types import ValidateKwargs
 from fabricatio.models.usages import LLMUsage
-class Propose[M: ProposedAble](LLMUsage):
+class Propose(LLMUsage):
     """A class that proposes an Obj based on a prompt."""
     @overload
-    async def propose(
+    async def propose[M: ProposedAble](
         self,
         cls: Type[M],
         prompt: List[str],
-        **kwargs: Unpack[GenerateKwargs],
-    ) -> List[M]: ...
+        **kwargs: Unpack[ValidateKwargs[M]],
+    ) -> Optional[List[M]]: ...
     @overload
-    async def propose(
+    async def propose[M: ProposedAble](
         self,
         cls: Type[M],
         prompt: str,
-        **kwargs: Unpack[GenerateKwargs],
-    ) -> M: ...
+        **kwargs: Unpack[ValidateKwargs[M]],
+    ) -> Optional[M]: ...
-    async def propose(
+    async def propose[M: ProposedAble](
         self,
         cls: Type[M],
         prompt: List[str] | str,
-        **kwargs: Unpack[GenerateKwargs],
-    ) -> List[M] | M:
+        **kwargs: Unpack[ValidateKwargs[M]],
+    ) -> Optional[List[M] | M]:
         """Asynchronously proposes a task based on a given prompt and parameters.
         Parameters:
@@ -42,14 +42,8 @@ class Propose[M: ProposedAble](LLMUsage):
         Returns:
             A Task object based on the proposal result.
         """
-        if isinstance(prompt, str):
-            return await self.aask_validate(
-                question=cls.create_json_prompt(prompt),
-                validator=cls.instantiate_from_string,
-                **kwargs,
-            )
-        return await self.aask_validate_batch(
-            questions=[cls.create_json_prompt(p) for p in prompt],
+        return await self.aask_validate(
+            question=cls.create_json_prompt(prompt),
             validator=cls.instantiate_from_string,
             **kwargs,
         )

fabricatio/capabilities/rag.py CHANGED Viewed

@@ -8,15 +8,21 @@ from functools import lru_cache
 from operator import itemgetter
 from os import PathLike
 from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional, Self, Union, Unpack, overload
+from typing import Any, Callable, Dict, List, Optional, Self, Union, Unpack, cast, overload
 from fabricatio._rust_instances import template_manager
 from fabricatio.config import configs
 from fabricatio.journal import logger
-from fabricatio.models.kwargs_types import CollectionSimpleConfigKwargs, EmbeddingKwargs, FetchKwargs, LLMKwargs
+from fabricatio.models.kwargs_types import (
+    ChooseKwargs,
+    CollectionSimpleConfigKwargs,
+    EmbeddingKwargs,
+    FetchKwargs,
+    LLMKwargs,
+)
 from fabricatio.models.usages import EmbeddingUsage
 from fabricatio.models.utils import MilvusData
-from more_itertools.recipes import flatten
+from more_itertools.recipes import flatten, unique
 from pydantic import Field, PrivateAttr
@@ -105,9 +111,9 @@ class RAG(EmbeddingUsage):
             create (bool): Whether to create the collection if it does not exist.
             **kwargs (Unpack[CollectionSimpleConfigKwargs]): Additional keyword arguments for collection configuration.
         """
-        if create and collection_name and not self._client.has_collection(collection_name):
+        if create and collection_name and self.client.has_collection(collection_name):
             kwargs["dimension"] = kwargs.get("dimension") or self.milvus_dimensions or configs.rag.milvus_dimensions
-            self._client.create_collection(collection_name, auto_id=True, **kwargs)
+            self.client.create_collection(collection_name, auto_id=True, **kwargs)
             logger.info(f"Creating collection {collection_name}")
         self.target_collection = collection_name
@@ -146,15 +152,17 @@ class RAG(EmbeddingUsage):
             Self: The current instance, allowing for method chaining.
         """
         if isinstance(data, MilvusData):
-            data = data.prepare_insertion()
-        if isinstance(data, list):
-            data = [d.prepare_insertion() if isinstance(d, MilvusData) else d for d in data]
+            prepared_data = data.prepare_insertion()
+        elif isinstance(data, list):
+            prepared_data = [d.prepare_insertion() if isinstance(d, MilvusData) else d for d in data]
+        else:
+            raise TypeError(f"Expected MilvusData or list of MilvusData, got {type(data)}")
         c_name = collection_name or self.safe_target_collection
-        self._client.insert(c_name, data)
+        self.client.insert(c_name, prepared_data)
         if flush:
             logger.debug(f"Flushing collection {c_name}")
-            self._client.flush(c_name)
+            self.client.flush(c_name)
         return self
     async def consume_file(
@@ -190,14 +198,14 @@ class RAG(EmbeddingUsage):
         self.add_document(await self.pack(text), collection_name or self.safe_target_collection, flush=True)
         return self
-    async def afetch_document(
+    async def afetch_document[V: (int, str, float, bytes)](
         self,
         vecs: List[List[float]],
         desired_fields: List[str] | str,
         collection_name: Optional[str] = None,
         similarity_threshold: float = 0.37,
         result_per_query: int = 10,
-    ) -> List[Dict[str, Any]] | List[Any]:
+    ) -> List[Dict[str, Any]] | List[V]:
         """Fetch data from the collection.
         Args:
@@ -211,7 +219,7 @@ class RAG(EmbeddingUsage):
             List[Dict[str, Any]] | List[Any]: The retrieved data.
         """
         # Step 1: Search for vectors
-        search_results = self._client.search(
+        search_results = self.client.search(
             collection_name or self.safe_target_collection,
             vecs,
             search_params={"radius": similarity_threshold},
@@ -221,9 +229,9 @@ class RAG(EmbeddingUsage):
         # Step 2: Flatten the search results
         flattened_results = flatten(search_results)
+        unique_results = unique(flattened_results, key=itemgetter("id"))
         # Step 3: Sort by distance (descending)
-        sorted_results = sorted(flattened_results, key=itemgetter("distance"), reverse=True)
+        sorted_results = sorted(unique_results, key=itemgetter("distance"), reverse=True)
         logger.debug(f"Searched similarities: {[t['distance'] for t in sorted_results]}")
         # Step 4: Extract the entities
@@ -231,12 +239,11 @@ class RAG(EmbeddingUsage):
         if isinstance(desired_fields, list):
             return resp
-        return [r.get(desired_fields) for r in resp]
+        return [r.get(desired_fields) for r in resp]  # extract the single field as list
     async def aretrieve(
         self,
         query: List[str] | str,
-        collection_name: Optional[str] = None,
         final_limit: int = 20,
         **kwargs: Unpack[FetchKwargs],
     ) -> List[str]:
@@ -244,7 +251,6 @@ class RAG(EmbeddingUsage):
         Args:
             query (List[str] | str): The query to be used for retrieval.
-            collection_name (Optional[str]): The name of the collection. If not provided, the currently viewed collection is used.
             final_limit (int): The final limit on the number of results to return.
             **kwargs (Unpack[FetchKwargs]): Additional keyword arguments for retrieval.
@@ -253,19 +259,19 @@ class RAG(EmbeddingUsage):
         """
         if isinstance(query, str):
             query = [query]
-        return (
+        return cast(
+            List[str],
             await self.afetch_document(
                 vecs=(await self.vectorize(query)),
                 desired_fields="text",
-                collection_name=collection_name,
                 **kwargs,
-            )
+            ),
         )[:final_limit]
     async def aask_retrieved(
         self,
-        question: str | List[str],
-        query: List[str] | str,
+        question: str,
+        query: Optional[List[str] | str] = None,
         collection_name: Optional[str] = None,
         extra_system_message: str = "",
         result_per_query: int = 10,
@@ -279,7 +285,7 @@ class RAG(EmbeddingUsage):
         specified question using the retrieved documents as context.
         Args:
-            question (str | List[str]): The question or list of questions to be asked.
+            question (str): The question to be asked.
             query (List[str] | str): The query or list of queries used for document retrieval.
             collection_name (Optional[str]): The name of the collection to retrieve documents from.
                                               If not provided, the currently viewed collection is used.
@@ -293,9 +299,9 @@ class RAG(EmbeddingUsage):
             str: A string response generated after asking with the context of retrieved documents.
         """
         docs = await self.aretrieve(
-            query,
-            collection_name,
+            query or question,
             final_limit,
+            collection_name=collection_name,
             result_per_query=result_per_query,
             similarity_threshold=similarity_threshold,
         )
@@ -308,3 +314,56 @@ class RAG(EmbeddingUsage):
             f"{rendered}\n\n{extra_system_message}",
             **kwargs,
         )
+    async def arefined_query(self, question: List[str] | str, **kwargs: Unpack[ChooseKwargs]) -> List[str]:
+        """Refines the given question using a template.
+        Args:
+            question (List[str] | str): The question to be refined.
+            **kwargs (Unpack[ChooseKwargs]): Additional keyword arguments for the refinement process.
+        Returns:
+            List[str]: A list of refined questions.
+        """
+        return await self.aliststr(
+            template_manager.render_template(
+                configs.templates.refined_query_template,
+                {"question": [question] if isinstance(question, str) else question},
+            ),
+            **kwargs,
+        )
+    async def aask_refined(
+        self,
+        question: str,
+        collection_name: Optional[str] = None,
+        extra_system_message: str = "",
+        result_per_query: int = 10,
+        final_limit: int = 20,
+        similarity_threshold: float = 0.37,
+        **kwargs: Unpack[LLMKwargs],
+    ) -> str:
+        """Asks a question using a refined query based on the provided question.
+        Args:
+            question (str): The question to be asked.
+            collection_name (Optional[str]): The name of the collection to retrieve documents from.
+            extra_system_message (str): An additional system message to be included in the prompt.
+            result_per_query (int): The number of results to return per query. Default is 10.
+            final_limit (int): The maximum number of retrieved documents to consider. Default is 20.
+            similarity_threshold (float): The threshold for similarity, only results above this threshold will be returned.
+            **kwargs (Unpack[LLMKwargs]): Additional keyword arguments passed to the underlying `aask` method.
+        Returns:
+            str: A string response generated after asking with the refined question.
+        """
+        return await self.aask_retrieved(
+            question,
+            await self.arefined_query(question, **kwargs),
+            collection_name=collection_name,
+            extra_system_message=extra_system_message,
+            result_per_query=result_per_query,
+            final_limit=final_limit,
+            similarity_threshold=similarity_threshold,
+            **kwargs,
+        )