PyPI - fabricatio - Versions diffs - 0.3.14.dev7__cp312-cp312-manylinux_2_34_x86_64.whl → 0.3.15.dev5__cp312-cp312-manylinux_2_34_x86_64.whl - Mend

fabricatio 0.3.14.dev7__cp312-cp312-manylinux_2_34_x86_64.whl → 0.3.15.dev5__cp312-cp312-manylinux_2_34_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

fabricatio/actions/article.py +115 -19
fabricatio/actions/article_rag.py +52 -52
fabricatio/actions/output.py +21 -22
fabricatio/decorators.py +2 -0
fabricatio/models/extra/aricle_rag.py +4 -5
fabricatio/models/extra/article_base.py +101 -35
fabricatio/models/extra/article_essence.py +1 -4
fabricatio/models/extra/article_main.py +12 -8
fabricatio/models/extra/article_outline.py +1 -2
fabricatio/models/extra/article_proposal.py +1 -1
fabricatio/models/extra/rule.py +1 -2
fabricatio/models/generic.py +93 -1
fabricatio/models/role.py +87 -26
fabricatio/rust.cpython-312-x86_64-linux-gnu.so +0 -0
fabricatio/rust.pyi +20 -61
fabricatio-0.3.15.dev5.data/scripts/tdown +0 -0
fabricatio-0.3.15.dev5.data/scripts/ttm +0 -0
{fabricatio-0.3.14.dev7.dist-info → fabricatio-0.3.15.dev5.dist-info}/METADATA +3 -1
{fabricatio-0.3.14.dev7.dist-info → fabricatio-0.3.15.dev5.dist-info}/RECORD +21 -22
{fabricatio-0.3.14.dev7.dist-info → fabricatio-0.3.15.dev5.dist-info}/WHEEL +1 -1
fabricatio/capabilities/persist.py +0 -103
fabricatio-0.3.14.dev7.data/scripts/tdown +0 -0
fabricatio-0.3.14.dev7.data/scripts/ttm +0 -0
{fabricatio-0.3.14.dev7.dist-info → fabricatio-0.3.15.dev5.dist-info}/licenses/LICENSE +0 -0

fabricatio/actions/article.py CHANGED Viewed

@@ -2,7 +2,7 @@
 from asyncio import gather
 from pathlib import Path
-from typing import Callable, List, Optional
+from typing import Callable, ClassVar, List, Optional
 from more_itertools import filter_map
 from pydantic import Field
@@ -15,14 +15,14 @@ from fabricatio.fs import dump_text, safe_text_read
 from fabricatio.journal import logger
 from fabricatio.models.action import Action
 from fabricatio.models.extra.article_essence import ArticleEssence
-from fabricatio.models.extra.article_main import Article
+from fabricatio.models.extra.article_main import Article, ArticleChapter, ArticleSubsection
 from fabricatio.models.extra.article_outline import ArticleOutline
 from fabricatio.models.extra.article_proposal import ArticleProposal
 from fabricatio.models.extra.rule import RuleSet
 from fabricatio.models.kwargs_types import ValidateKwargs
 from fabricatio.models.task import Task
 from fabricatio.models.usages import LLMUsage
-from fabricatio.rust import CONFIG, TEMPLATE_MANAGER, BibManager, detect_language
+from fabricatio.rust import CONFIG, TEMPLATE_MANAGER, BibManager, detect_language, word_count
 from fabricatio.utils import ok, wrapp_in_block
@@ -277,43 +277,139 @@ class LoadArticle(Action):
 class WriteChapterSummary(Action, LLMUsage):
     """Write the chapter summary."""
-    output_key: str = "chapter_summaries"
+    ctx_override: ClassVar[bool] = True
     paragraph_count: int = 1
+    """The number of paragraphs to generate in the chapter summary."""
-    summary_word_count: int = 200
+    summary_word_count: int = 120
+    """The number of words to use in each chapter summary."""
+    output_key: str = "summarized_article"
+    """The key under which the summarized article will be stored in the output."""
     summary_title: str = "Chapter Summary"
-    write_to: Optional[Path] = None
+    """The title to be used for the generated chapter summary section."""
+    skip_chapters: List[str] = Field(default_factory=list)
+    """A list of chapter titles to skip during summary generation."""
+    async def _execute(self, article_path: Path, **cxt) -> Article:
+        article = Article.from_article_file(article_path, article_path.stem)
+        chaps = [c for c in article.chapters if c.title not in self.skip_chapters]
+        retained_chapters = []
+        # Count chapters before filtering based on section presence,
+        # chaps at this point has already been filtered by self.skip_chapters
+        initial_chaps_for_summary_step_count = len(chaps)
+        for chapter_candidate in chaps:
+            if chapter_candidate.sections:  # Check if the sections list is non-empty
+                retained_chapters.append(chapter_candidate)
+            else:
+                # Log c warning for each chapter skipped due to lack of sections
+                logger.warning(
+                    f"Chapter '{chapter_candidate.title}' has no sections and will be skipped for summary generation."
+                )
+        chaps = retained_chapters  # Update chaps to only include chapters with sections
-    async def _execute(self, article: Article, write_to: Optional[Path] = None, **cxt) -> List[str]:
-        logger.info(";".join(a.title for a in article.chapters))
+        # If chaps is now empty, but there were chapters to consider at the start of this step,
+        # log c specific warning.
+        if not chaps and initial_chaps_for_summary_step_count > 0:
+            raise ValueError("No chapters with sections were found. Please check your input data.")
+        # This line was part of the original selection.
+        # It will now log the titles of the chapters that are actually being processed (those with sections).
+        # If 'chaps' is empty, this will result in logger.info(""), which is acceptable.
+        logger.info(";".join(a.title for a in chaps))
         ret = [
-            f"== {self.summary_title}\n{raw}"
+            ArticleSubsection.from_typst_code(self.summary_title, raw)
             for raw in (
                 await self.aask(
                     TEMPLATE_MANAGER.render_template(
                         CONFIG.templates.chap_summary_template,
                         [
                             {
-                                "chapter": a.to_typst_code(),
-                                "title": a.title,
-                                "language": a.language,
+                                "chapter": c.to_typst_code(),
+                                "title": c.title,
+                                "language": c.language,
                                 "summary_word_count": self.summary_word_count,
                                 "paragraph_count": self.paragraph_count,
                             }
-                            for a in article.chapters
+                            for c in chaps
                         ],
                     )
                 )
             )
         ]
-        if (to := (self.write_to or write_to)) is not None:
-            dump_text(
-                to,
-                "\n\n\n".join(f"//{a.title}\n\n{s}" for a, s in zip(article.chapters, ret, strict=True)),
+        for c, n in zip(chaps, ret, strict=True):
+            c: ArticleChapter
+            n: ArticleSubsection
+            if c.sections[-1].title == self.summary_title:
+                logger.debug(f"Removing old summary `{self.summary_title}` at {c.title}")
+                c.sections.pop()
+            c.sections[-1].subsections.append(n)
+        article.update_article_file(article_path)
+        dump_text(
+            article_path, safe_text_read(article_path).replace(f"=== {self.summary_title}", f"== {self.summary_title}")
+        )
+        return article
+class WriteResearchContentSummary(Action, LLMUsage):
+    """Write the research content summary."""
+    ctx_override: ClassVar[bool] = True
+    summary_word_count: int = 160
+    """The number of words to use in the research content summary."""
+    output_key: str = "summarized_article"
+    """The key under which the summarized article will be stored in the output."""
+    summary_title: str = "Research Content"
+    """The title to be used for the generated research content summary section."""
+    paragraph_count: int = 1
+    """The number of paragraphs to generate in the research content summary."""
+    async def _execute(self, article_path: Path, **cxt) -> Article:
+        article = Article.from_article_file(article_path, article_path.stem)
+        if not article.chapters:
+            raise ValueError("No chapters found in the article.")
+        chap_1 = article.chapters[0]
+        if not chap_1.sections:
+            raise ValueError("No sections found in the first chapter of the article.")
+        outline = article.extrac_outline()
+        suma: str = await self.aask(
+            TEMPLATE_MANAGER.render_template(
+                CONFIG.templates.research_content_summary_template,
+                {
+                    "title": outline.title,
+                    "outline": outline.to_typst_code(),
+                    "language": detect_language(self.summary_title),
+                    "summary_word_count": self.summary_word_count,
+                    "paragraph_count": self.paragraph_count,
+                },
             )
+        )
+        logger.success(
+            f"{self.summary_title}|Wordcount: {word_count(suma)}|Expected: {self.summary_word_count}\n{suma}"
+        )
+        if chap_1.sections[-1].title == self.summary_title:
+            # remove old
+            logger.debug(f"Removing old summary `{self.summary_title}`")
+            chap_1.sections.pop()
-        return ret
+        chap_1.sections[-1].subsections.append(ArticleSubsection.from_typst_code(self.summary_title, suma))
+        article.update_article_file(article_path)
+        dump_text(
+            article_path, safe_text_read(article_path).replace(f"=== {self.summary_title}", f"== {self.summary_title}")
+        )
+        return article

fabricatio/actions/article_rag.py CHANGED Viewed

@@ -1,11 +1,11 @@
 """A module for writing articles using RAG (Retrieval-Augmented Generation) capabilities."""
 from asyncio import gather
 from pathlib import Path
-from pydantic import Field, PositiveInt
 from typing import ClassVar, List, Optional
+from pydantic import Field, PositiveInt
 from fabricatio.capabilities.advanced_rag import AdvancedRAG
 from fabricatio.capabilities.censor import Censor
 from fabricatio.capabilities.extract import Extract
@@ -75,11 +75,11 @@ class WriteArticleContentRAG(Action, Extract, AdvancedRAG):
     tei_endpoint: Optional[str] = None
     async def _execute(
-            self,
-            article_outline: ArticleOutline,
-            collection_name: Optional[str] = None,
-            supervisor: Optional[bool] = None,
-            **cxt,
+        self,
+        article_outline: ArticleOutline,
+        collection_name: Optional[str] = None,
+        supervisor: Optional[bool] = None,
+        **cxt,
     ) -> Article:
         article = Article.from_outline(article_outline).update_ref(article_outline)
         self.target_collection = collection_name or self.safe_target_collection
@@ -100,12 +100,12 @@ class WriteArticleContentRAG(Action, Extract, AdvancedRAG):
         "questionary", "`questionary` is required for supervisor mode, please install it by `fabricatio[qa]`"
     )
     async def _supervisor_inner(
-            self,
-            article: Article,
-            article_outline: ArticleOutline,
-            chap: ArticleChapter,
-            sec: ArticleSection,
-            subsec: ArticleSubsection,
+        self,
+        article: Article,
+        article_outline: ArticleOutline,
+        chap: ArticleChapter,
+        sec: ArticleSection,
+        subsec: ArticleSubsection,
     ) -> ArticleSubsection:
         from questionary import confirm, text
         from rich import print as r_print
@@ -133,12 +133,12 @@ class WriteArticleContentRAG(Action, Extract, AdvancedRAG):
         return await self.extract_new_subsec(subsec, raw_paras, cm)
     async def _inner(
-            self,
-            article: Article,
-            article_outline: ArticleOutline,
-            chap: ArticleChapter,
-            sec: ArticleSection,
-            subsec: ArticleSubsection,
+        self,
+        article: Article,
+        article_outline: ArticleOutline,
+        chap: ArticleChapter,
+        sec: ArticleSection,
+        subsec: ArticleSubsection,
     ) -> ArticleSubsection:
         cm = CitationManager()
@@ -154,7 +154,7 @@ class WriteArticleContentRAG(Action, Extract, AdvancedRAG):
         return await self.extract_new_subsec(subsec, raw_paras, cm)
     async def extract_new_subsec(
-            self, subsec: ArticleSubsection, raw_paras: str, cm: CitationManager
+        self, subsec: ArticleSubsection, raw_paras: str, cm: CitationManager
     ) -> ArticleSubsection:
         """Extract the new subsec."""
         new_subsec = ok(
@@ -177,14 +177,14 @@ class WriteArticleContentRAG(Action, Extract, AdvancedRAG):
         return subsec
     async def write_raw(
-            self,
-            article: Article,
-            article_outline: ArticleOutline,
-            chap: ArticleChapter,
-            sec: ArticleSection,
-            subsec: ArticleSubsection,
-            cm: CitationManager,
-            extra_instruction: str = "",
+        self,
+        article: Article,
+        article_outline: ArticleOutline,
+        chap: ArticleChapter,
+        sec: ArticleSection,
+        subsec: ArticleSubsection,
+        cm: CitationManager,
+        extra_instruction: str = "",
     ) -> str:
         """Write the raw paragraphs of the subsec."""
         return await self.aask(
@@ -200,14 +200,14 @@ class WriteArticleContentRAG(Action, Extract, AdvancedRAG):
         )
     async def search_database(
-            self,
-            article: Article,
-            article_outline: ArticleOutline,
-            chap: ArticleChapter,
-            sec: ArticleSection,
-            subsec: ArticleSubsection,
-            cm: CitationManager,
-            extra_instruction: str = "",
+        self,
+        article: Article,
+        article_outline: ArticleOutline,
+        chap: ArticleChapter,
+        sec: ArticleSection,
+        subsec: ArticleSubsection,
+        cm: CitationManager,
+        extra_instruction: str = "",
     ) -> None:
         """Search database for related references."""
         search_req = (
@@ -312,12 +312,12 @@ class TweakArticleRAG(Action, RAG, Censor):
     """The limit of references to be retrieved"""
     async def _execute(
-            self,
-            article: Article,
-            collection_name: str = "article_essence",
-            twk_rag_ruleset: Optional[RuleSet] = None,
-            parallel: bool = False,
-            **cxt,
+        self,
+        article: Article,
+        collection_name: str = "article_essence",
+        twk_rag_ruleset: Optional[RuleSet] = None,
+        parallel: bool = False,
+        **cxt,
     ) -> Article:
         """Write an article based on the provided outline.
@@ -372,10 +372,10 @@ class TweakArticleRAG(Action, RAG, Censor):
             subsec,
             ruleset=ruleset,
             reference=f"{'\n\n'.join(d.display() for d in await self.aretrieve(refind_q, document_model=ArticleEssence, max_accepted=self.ref_limit))}\n\n"
-                      f"You can use Reference above to rewrite the `{subsec.__class__.__name__}`.\n"
-                      f"You should Always use `{subsec.language}` as written language, "
-                      f"which is the original language of the `{subsec.title}`. "
-                      f"since rewrite a `{subsec.__class__.__name__}` in a different language is usually a bad choice",
+            f"You can use Reference above to rewrite the `{subsec.__class__.__name__}`.\n"
+            f"You should Always use `{subsec.language}` as written language, "
+            f"which is the original language of the `{subsec.title}`. "
+            f"since rewrite a `{subsec.__class__.__name__}` in a different language is usually a bad choice",
         )
@@ -390,12 +390,12 @@ class ChunkArticle(Action):
     """The maximum overlapping rate between chunks."""
     async def _execute(
-            self,
-            article_path: str | Path,
-            bib_manager: BibManager,
-            max_chunk_size: Optional[int] = None,
-            max_overlapping_rate: Optional[float] = None,
-            **_,
+        self,
+        article_path: str | Path,
+        bib_manager: BibManager,
+        max_chunk_size: Optional[int] = None,
+        max_overlapping_rate: Optional[float] = None,
+        **_,
     ) -> List[ArticleChunk]:
         return ArticleChunk.from_file(
             article_path,

fabricatio/actions/output.py CHANGED Viewed

@@ -3,11 +3,10 @@
 from pathlib import Path
 from typing import Any, Iterable, List, Mapping, Optional, Self, Sequence, Type
-from fabricatio.capabilities.persist import PersistentAble
 from fabricatio.fs import dump_text
 from fabricatio.journal import logger
 from fabricatio.models.action import Action
-from fabricatio.models.generic import FinalizedDumpAble, FromMapping, FromSequence
+from fabricatio.models.generic import FinalizedDumpAble, FromMapping, FromSequence, PersistentAble
 from fabricatio.models.task import Task
 from fabricatio.models.usages import LLMUsage
 from fabricatio.rust import TEMPLATE_MANAGER
@@ -21,11 +20,11 @@ class DumpFinalizedOutput(Action, LLMUsage):
     dump_path: Optional[str] = None
     async def _execute(
-        self,
-        to_dump: FinalizedDumpAble,
-        task_input: Optional[Task] = None,
-        dump_path: Optional[str | Path] = None,
-        **_,
+            self,
+            to_dump: FinalizedDumpAble,
+            task_input: Optional[Task] = None,
+            dump_path: Optional[str | Path] = None,
+            **_,
     ) -> str:
         dump_path = Path(
             dump_path
@@ -52,11 +51,11 @@ class RenderedDump(Action, LLMUsage):
     """The template name to render the data."""
     async def _execute(
-        self,
-        to_dump: FinalizedDumpAble,
-        task_input: Optional[Task] = None,
-        dump_path: Optional[str | Path] = None,
-        **_,
+            self,
+            to_dump: FinalizedDumpAble,
+            task_input: Optional[Task] = None,
+            dump_path: Optional[str | Path] = None,
+            **_,
     ) -> str:
         dump_path = Path(
             dump_path
@@ -91,10 +90,10 @@ class PersistentAll(Action, LLMUsage):
     """Whether to remove the existing dir before dumping."""
     async def _execute(
-        self,
-        task_input: Optional[Task] = None,
-        persist_dir: Optional[str | Path] = None,
-        **cxt,
+            self,
+            task_input: Optional[Task] = None,
+            persist_dir: Optional[str | Path] = None,
+            **cxt,
     ) -> int:
         persist_dir = Path(
             persist_dir
@@ -124,7 +123,7 @@ class PersistentAll(Action, LLMUsage):
                 v.persist(final_dir)
                 count += 1
             if isinstance(v, Iterable) and any(
-                persistent_ables := (pers for pers in v if isinstance(pers, PersistentAble))
+                    persistent_ables := (pers for pers in v if isinstance(pers, PersistentAble))
             ):
                 logger.info(f"Persisting collection {k} to {final_dir}")
                 final_dir.mkdir(parents=True, exist_ok=True)
@@ -174,11 +173,11 @@ class RetrieveFromLatest[T: PersistentAble](RetrieveFromPersistent[T], FromMappi
     @classmethod
     def from_mapping(
-        cls,
-        mapping: Mapping[str, str | Path],
-        *,
-        retrieve_cls: Type[T],
-        **kwargs,
+            cls,
+            mapping: Mapping[str, str | Path],
+            *,
+            retrieve_cls: Type[T],
+            **kwargs,
     ) -> List["RetrieveFromLatest[T]"]:
         """Create a list of `RetrieveFromLatest` from the mapping."""
         return [

fabricatio/decorators.py CHANGED Viewed

@@ -235,6 +235,7 @@ def logging_exec_time[**P, R](
         @wraps(func)
         async def _async_wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
             start_time = time()
+            logger.debug(f"Starting execution of {func.__name__}")
             result = await func(*args, **kwargs)
             logger.debug(f"Execution time of `{func.__name__}`: {time() - start_time:.2f} s")
             return result
@@ -244,6 +245,7 @@ def logging_exec_time[**P, R](
     @wraps(func)
     def _wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
         start_time = time()
+        logger.debug(f"Starting execution of {func.__name__}")
         result = func(*args, **kwargs)
         logger.debug(f"Execution time of {func.__name__}: {(time() - start_time) * 1000:.2f} ms")
         return result

fabricatio/models/extra/aricle_rag.py CHANGED Viewed

@@ -1,13 +1,9 @@
 """A Module containing the article rag models."""
-from itertools import groupby
 import re
 from dataclasses import dataclass, field
-from more_itertools.more import first
-from more_itertools.recipes import flatten, unique
+from itertools import groupby
 from pathlib import Path
-from pydantic import Field
 from typing import ClassVar, Dict, List, Optional, Self, Unpack
 from fabricatio.fs import safe_text_read
@@ -17,6 +13,9 @@ from fabricatio.models.generic import AsPrompt
 from fabricatio.models.kwargs_types import ChunkKwargs
 from fabricatio.rust import BibManager, blake3_hash, split_into_chunks
 from fabricatio.utils import ok, wrapp_in_block
+from more_itertools.more import first
+from more_itertools.recipes import flatten, unique
+from pydantic import Field
 class ArticleChunk(MilvusDataBase):