PyPI - fabricatio - Versions diffs - 0.2.9.dev4__cp312-cp312-win_amd64.whl → 0.2.10.dev1__cp312-cp312-win_amd64.whl - Mend

fabricatio 0.2.9.dev4__cp312-cp312-win_amd64.whl → 0.2.10.dev1__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

fabricatio/actions/article.py +11 -107
fabricatio/actions/article_rag.py +33 -2
fabricatio/actions/rag.py +40 -18
fabricatio/capabilities/check.py +2 -1
fabricatio/capabilities/rag.py +41 -231
fabricatio/constants.py +20 -0
fabricatio/decorators.py +23 -0
fabricatio/models/adv_kwargs_types.py +35 -0
fabricatio/models/events.py +6 -6
fabricatio/models/extra/advanced_judge.py +2 -2
fabricatio/models/extra/aricle_rag.py +120 -0
fabricatio/models/extra/article_base.py +2 -186
fabricatio/models/extra/article_essence.py +8 -7
fabricatio/models/extra/article_main.py +12 -107
fabricatio/models/extra/problem.py +12 -17
fabricatio/models/extra/rag.py +98 -0
fabricatio/models/extra/rule.py +1 -2
fabricatio/models/generic.py +19 -11
fabricatio/models/kwargs_types.py +6 -36
fabricatio/models/task.py +3 -3
fabricatio/models/usages.py +73 -5
fabricatio/rust.cp312-win_amd64.pyd +0 -0
fabricatio/rust.pyi +35 -6
fabricatio/utils.py +14 -1
{fabricatio-0.2.9.dev4.data → fabricatio-0.2.10.dev1.data}/scripts/tdown.exe +0 -0
{fabricatio-0.2.9.dev4.dist-info → fabricatio-0.2.10.dev1.dist-info}/METADATA +1 -4
{fabricatio-0.2.9.dev4.dist-info → fabricatio-0.2.10.dev1.dist-info}/RECORD +29 -27
fabricatio/models/utils.py +0 -148
{fabricatio-0.2.9.dev4.dist-info → fabricatio-0.2.10.dev1.dist-info}/WHEEL +0 -0
{fabricatio-0.2.9.dev4.dist-info → fabricatio-0.2.10.dev1.dist-info}/licenses/LICENSE +0 -0

fabricatio/actions/article.py CHANGED Viewed

@@ -4,6 +4,7 @@ from asyncio import gather
 from pathlib import Path
 from typing import Callable, List, Optional
+from fabricatio.rust import BibManager, detect_language
 from more_itertools import filter_map
 from fabricatio.capabilities.censor import Censor
@@ -11,14 +12,12 @@ from fabricatio.capabilities.propose import Propose
 from fabricatio.fs import safe_text_read
 from fabricatio.journal import logger
 from fabricatio.models.action import Action
-from fabricatio.models.extra.article_base import SubSectionBase
 from fabricatio.models.extra.article_essence import ArticleEssence
 from fabricatio.models.extra.article_main import Article
 from fabricatio.models.extra.article_outline import ArticleOutline
 from fabricatio.models.extra.article_proposal import ArticleProposal
 from fabricatio.models.extra.rule import RuleSet
 from fabricatio.models.task import Task
-from fabricatio.rust import BibManager, detect_language
 from fabricatio.utils import ok
@@ -79,7 +78,7 @@ class FixArticleEssence(Action):
         out = []
         count = 0
         for a in article_essence:
-            if key := (bib_mgr.get_cite_key(a.title) or bib_mgr.get_cite_key_fuzzy(a.title)):
+            if key := (bib_mgr.get_cite_key_by_title(a.title) or bib_mgr.get_cite_key_fuzzy(a.title)):
                 a.title = bib_mgr.get_title_by_key(key) or a.title
                 a.authors = bib_mgr.get_author_by_key(key) or a.authors
                 a.publication_year = bib_mgr.get_year_by_key(key) or a.publication_year
@@ -142,11 +141,17 @@ class GenerateInitialOutline(Action, Propose):
         article_proposal: ArticleProposal,
         **_,
     ) -> Optional[ArticleOutline]:
+        raw_outline = await self.aask(
+            f"{(article_proposal.as_prompt())}\n\nNote that you should use `{article_proposal.language}` to write the `ArticleOutline`\n"
+            f"Design each chapter of a proper and academic and ready for release manner.\n"
+            f"You Must make sure every chapter have sections, and every section have subsections.\n"
+            f"Make the chapter and sections and subsections bing divided into a specific enough article component.",
+        )
         return ok(
             await self.propose(
                 ArticleOutline,
-                f"{(article_proposal.as_prompt())}\n\nNote that you should use `{article_proposal.language}` to write the `ArticleOutline`\n"
-                f"You Must make sure every chapter have sections, and every section have subsections.",
+                f"{raw_outline}\n\n\n\noutline provided above is the outline i need to extract to a JSON,",
             ),
             "Could not generate the initial outline.",
         ).update_ref(article_proposal)
@@ -178,7 +183,7 @@ class FixIntrospectedErrors(Action, Censor):
                 await self.censor_obj(
                     article_outline,
                     ruleset=ok(intro_fix_ruleset or self.ruleset, "No ruleset provided"),
-                    reference=f"{article_outline.as_prompt()}\n # Fatal Error of the Original Article Outline\n{pack}",
+                    reference=f"{article_outline.display()}\n # Fatal Error of the Original Article Outline\n{pack}",
                 ),
                 "Could not correct the component.",
             ).update_ref(origin)
@@ -191,107 +196,6 @@ class FixIntrospectedErrors(Action, Censor):
         return article_outline
-class FixIllegalReferences(Action, Censor):
-    """Fix illegal references in the article outline."""
-    output_key: str = "illegal_references_fixed_outline"
-    """The key of the output data."""
-    ruleset: Optional[RuleSet] = None
-    """Ruleset to use to fix the illegal references."""
-    max_error_count: Optional[int] = None
-    """The maximum number of errors to fix."""
-    async def _execute(
-        self,
-        article_outline: ArticleOutline,
-        ref_fix_ruleset: Optional[RuleSet] = None,
-        **_,
-    ) -> Optional[ArticleOutline]:
-        counter = 0
-        while pack := article_outline.find_illegal_ref(gather_identical=True):
-            logger.info(f"Found {counter}th illegal references")
-            ref_seq, err = ok(pack)
-            logger.warning(f"Found illegal referring error: {err}")
-            new = ok(
-                await self.censor_obj(
-                    ref_seq[0],
-                    ruleset=ok(ref_fix_ruleset or self.ruleset, "No ruleset provided"),
-                    reference=f"{article_outline.as_prompt()}\n# Some Basic errors found that need to be fixed\n{err}",
-                ),
-                "Could not correct the component",
-            )
-            for r in ref_seq:
-                r.update_from(new)
-            if self.max_error_count and counter > self.max_error_count:
-                logger.warning("Max error count reached, stopping.")
-                break
-            counter += 1
-        return article_outline
-class TweakOutlineForwardRef(Action, Censor):
-    """Tweak the forward references in the article outline.
-    Ensures that the conclusions of the current chapter effectively support the analysis of subsequent chapters.
-    """
-    output_key: str = "article_outline_fw_ref_checked"
-    ruleset: Optional[RuleSet] = None
-    """Ruleset to use to fix the illegal references."""
-    async def _execute(
-        self, article_outline: ArticleOutline, ref_twk_ruleset: Optional[RuleSet] = None, **cxt
-    ) -> ArticleOutline:
-        return await self._inner(
-            article_outline,
-            ruleset=ok(ref_twk_ruleset or self.ruleset, "No ruleset provided"),
-            field_name="support_to",
-        )
-    async def _inner(self, article_outline: ArticleOutline, ruleset: RuleSet, field_name: str) -> ArticleOutline:
-        await gather(
-            *[self._loop(a[-1], article_outline, field_name, ruleset) for a in article_outline.iter_subsections()],
-        )
-        return article_outline
-    async def _loop(
-        self, a: SubSectionBase, article_outline: ArticleOutline, field_name: str, ruleset: RuleSet
-    ) -> None:
-        if judge := await self.evidently_judge(
-            f"{article_outline.as_prompt()}\n\n{a.display()}\n"
-            f"Does the `{a.__class__.__name__}`'s `{field_name}` field need to be extended or tweaked?"
-        ):
-            await self.censor_obj_inplace(
-                a,
-                ruleset=ruleset,
-                reference=f"{article_outline.as_prompt()}\n"
-                f"The Article component titled `{a.title}` whose `{field_name}` field needs to be extended or tweaked.\n"
-                f"# Judgement\n{judge.display()}",
-            )
-class TweakOutlineBackwardRef(TweakOutlineForwardRef):
-    """Tweak the backward references in the article outline.
-    Ensures that the prerequisites of the current chapter are correctly referenced in the `depend_on` field.
-    """
-    output_key: str = "article_outline_bw_ref_checked"
-    ruleset: Optional[RuleSet] = None
-    async def _execute(
-        self, article_outline: ArticleOutline, ref_twk_ruleset: Optional[RuleSet] = None, **cxt
-    ) -> ArticleOutline:
-        return await self._inner(
-            article_outline,
-            ruleset=ok(ref_twk_ruleset or self.ruleset, "No ruleset provided"),
-            field_name="depend_on",
-        )
 class GenerateArticle(Action, Censor):
     """Generate the article based on the outline."""

fabricatio/actions/article_rag.py CHANGED Viewed

@@ -1,11 +1,15 @@
 """A module for writing articles using RAG (Retrieval-Augmented Generation) capabilities."""
 from asyncio import gather
-from typing import Optional
+from pathlib import Path
+from typing import List, Optional
+from fabricatio import BibManager
 from fabricatio.capabilities.censor import Censor
 from fabricatio.capabilities.rag import RAG
 from fabricatio.models.action import Action
+from fabricatio.models.extra.aricle_rag import ArticleChunk
+from fabricatio.models.extra.article_essence import ArticleEssence
 from fabricatio.models.extra.article_main import Article, ArticleSubsection
 from fabricatio.models.extra.rule import RuleSet
 from fabricatio.utils import ok
@@ -97,9 +101,36 @@ class TweakArticleRAG(Action, RAG, Censor):
         await self.censor_obj_inplace(
             subsec,
             ruleset=ruleset,
-            reference=f"{await self.aretrieve_compact(refind_q, final_limit=self.ref_limit)}\n\n"
+            reference=f"{'\n\n'.join(d.display() for d in await self.aretrieve(refind_q, document_model=ArticleEssence, final_limit=self.ref_limit))}\n\n"
             f"You can use Reference above to rewrite the `{subsec.__class__.__name__}`.\n"
             f"You should Always use `{subsec.language}` as written language, "
             f"which is the original language of the `{subsec.title}`. "
             f"since rewrite a `{subsec.__class__.__name__}` in a different language is usually a bad choice",
         )
+class ChunkArticle(Action):
+    """Chunk an article into smaller chunks."""
+    output_key:str = "article_chunks"
+    """The key used to store the output of the action."""
+    max_chunk_size: Optional[int] = None
+    """The maximum size of each chunk."""
+    max_overlapping_rate: Optional[float] = None
+    """The maximum overlapping rate between chunks."""
+    async def _execute(
+        self,
+        article_path: str | Path,
+        bib_manager: BibManager,
+        max_chunk_size: Optional[int] = None,
+        max_overlapping_rate: Optional[float] = None,
+        **_,
+    ) -> List[ArticleChunk]:
+        return ArticleChunk.from_file(
+            article_path,
+            bib_manager,
+            max_chunk_size=ok(max_chunk_size or self.max_chunk_size, "No max_chunk_size provided!"),
+            max_overlapping_rate=ok(max_overlapping_rate or self.max_overlapping_rate, "No max_overlapping_rate provided!"),
+        )

fabricatio/actions/rag.py CHANGED Viewed

@@ -5,34 +5,56 @@ from typing import List, Optional
 from questionary import text
 from fabricatio.capabilities.rag import RAG
+from fabricatio.config import configs
 from fabricatio.journal import logger
 from fabricatio.models.action import Action
-from fabricatio.models.generic import Vectorizable
+from fabricatio.models.extra.rag import MilvusClassicModel, MilvusDataBase
 from fabricatio.models.task import Task
+from fabricatio.utils import ok
 class InjectToDB(Action, RAG):
     """Inject data into the database."""
     output_key: str = "collection_name"
+    collection_name: str = "my_collection"
+    """The name of the collection to inject data into."""
-    async def _execute[T: Vectorizable](
-        self, to_inject: Optional[T] | List[Optional[T]], collection_name: str = "my_collection",override_inject:bool=False, **_
+    async def _execute[T: MilvusDataBase](
+        self, to_inject: Optional[T] | List[Optional[T]], override_inject: bool = False, **_
     ) -> Optional[str]:
+        from pymilvus.milvus_client import IndexParams
+        if to_inject is None:
+            return None
         if not isinstance(to_inject, list):
             to_inject = [to_inject]
-        logger.info(f"Injecting {len(to_inject)} items into the collection '{collection_name}'")
+        if not (seq := [t for t in to_inject if t is not None]):  # filter out None
+            return None
+        logger.info(f"Injecting {len(seq)} items into the collection '{self.collection_name}'")
         if override_inject:
-            self.check_client().client.drop_collection(collection_name)
-        await self.view(collection_name, create=True).consume_string(
-            [
-                t.prepare_vectorization(self.embedding_max_sequence_length)
-                for t in to_inject
-                if isinstance(t, Vectorizable)
-            ],
-        )
-        return collection_name
+            self.check_client().client.drop_collection(self.collection_name)
+        await self.view(
+            self.collection_name,
+            create=True,
+            schema=seq[0].as_milvus_schema(
+                ok(
+                    self.milvus_dimensions
+                    or configs.rag.milvus_dimensions
+                    or self.embedding_dimensions
+                    or configs.embedding.dimensions
+                ),
+            ),
+            index_params=IndexParams(
+                seq[0].vector_field_name,
+                index_name=seq[0].vector_field_name,
+                index_type=seq[0].index_type,
+                metric_type=seq[0].metric_type,
+            ),
+        ).add_document(seq, flush=True)
+        return self.collection_name
 class RAGTalk(Action, RAG):
@@ -62,10 +84,10 @@ class RAGTalk(Action, RAG):
                 user_say = await text("User: ").ask_async()
                 if user_say is None:
                     break
-                gpt_say = await self.aask_retrieved(
-                    user_say,
-                    user_say,
-                    extra_system_message=f"You have to answer to user obeying task assigned to you:\n{task_input.briefing}",
+                ret: List[MilvusClassicModel] = await self.aretrieve(user_say, document_model=MilvusClassicModel)
+                gpt_say = await self.aask(
+                    user_say, system_message="\n".join(m.text for m in ret) + "\nYou can refer facts provided above."
                 )
                 print(f"GPT: {gpt_say}")  # noqa: T201
                 counter += 1

fabricatio/capabilities/check.py CHANGED Viewed

@@ -104,7 +104,8 @@ class Check(AdvancedJudge, Propose):
             - Proposes Improvement only when violation is confirmed
         """
         if judge := await self.evidently_judge(
-            f"# Content to exam\n{input_text}\n\n# Rule Must to follow\n{rule.display()}\nDoes `Content to exam` provided above violate the `Rule Must to follow` provided above?",
+            f"# Content to exam\n{input_text}\n\n# Rule Must to follow\n{rule.display()}\nDoes `Content to exam` provided above violate the `{rule.name}` provided above?"
+            f"should I take some measure to fix that violation? true for I do need, false for I don't need.",
             **override_kwargs(kwargs, default=None),
         ):
             logger.info(f"Rule `{rule.name}` violated: \n{judge.display()}")