fabricatio 0.3.14.dev7__cp312-cp312-manylinux_2_34_x86_64.whl → 0.3.15.dev5__cp312-cp312-manylinux_2_34_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
 
3
3
  from asyncio import gather
4
4
  from pathlib import Path
5
- from typing import Callable, List, Optional
5
+ from typing import Callable, ClassVar, List, Optional
6
6
 
7
7
  from more_itertools import filter_map
8
8
  from pydantic import Field
@@ -15,14 +15,14 @@ from fabricatio.fs import dump_text, safe_text_read
15
15
  from fabricatio.journal import logger
16
16
  from fabricatio.models.action import Action
17
17
  from fabricatio.models.extra.article_essence import ArticleEssence
18
- from fabricatio.models.extra.article_main import Article
18
+ from fabricatio.models.extra.article_main import Article, ArticleChapter, ArticleSubsection
19
19
  from fabricatio.models.extra.article_outline import ArticleOutline
20
20
  from fabricatio.models.extra.article_proposal import ArticleProposal
21
21
  from fabricatio.models.extra.rule import RuleSet
22
22
  from fabricatio.models.kwargs_types import ValidateKwargs
23
23
  from fabricatio.models.task import Task
24
24
  from fabricatio.models.usages import LLMUsage
25
- from fabricatio.rust import CONFIG, TEMPLATE_MANAGER, BibManager, detect_language
25
+ from fabricatio.rust import CONFIG, TEMPLATE_MANAGER, BibManager, detect_language, word_count
26
26
  from fabricatio.utils import ok, wrapp_in_block
27
27
 
28
28
 
@@ -277,43 +277,139 @@ class LoadArticle(Action):
277
277
  class WriteChapterSummary(Action, LLMUsage):
278
278
  """Write the chapter summary."""
279
279
 
280
- output_key: str = "chapter_summaries"
280
+ ctx_override: ClassVar[bool] = True
281
281
 
282
282
  paragraph_count: int = 1
283
+ """The number of paragraphs to generate in the chapter summary."""
283
284
 
284
- summary_word_count: int = 200
285
-
285
+ summary_word_count: int = 120
286
+ """The number of words to use in each chapter summary."""
287
+ output_key: str = "summarized_article"
288
+ """The key under which the summarized article will be stored in the output."""
286
289
  summary_title: str = "Chapter Summary"
287
- write_to: Optional[Path] = None
290
+ """The title to be used for the generated chapter summary section."""
291
+
292
+ skip_chapters: List[str] = Field(default_factory=list)
293
+ """A list of chapter titles to skip during summary generation."""
294
+
295
+ async def _execute(self, article_path: Path, **cxt) -> Article:
296
+ article = Article.from_article_file(article_path, article_path.stem)
297
+
298
+ chaps = [c for c in article.chapters if c.title not in self.skip_chapters]
299
+
300
+ retained_chapters = []
301
+ # Count chapters before filtering based on section presence,
302
+ # chaps at this point has already been filtered by self.skip_chapters
303
+ initial_chaps_for_summary_step_count = len(chaps)
304
+
305
+ for chapter_candidate in chaps:
306
+ if chapter_candidate.sections: # Check if the sections list is non-empty
307
+ retained_chapters.append(chapter_candidate)
308
+ else:
309
+ # Log c warning for each chapter skipped due to lack of sections
310
+ logger.warning(
311
+ f"Chapter '{chapter_candidate.title}' has no sections and will be skipped for summary generation."
312
+ )
313
+
314
+ chaps = retained_chapters # Update chaps to only include chapters with sections
288
315
 
289
- async def _execute(self, article: Article, write_to: Optional[Path] = None, **cxt) -> List[str]:
290
- logger.info(";".join(a.title for a in article.chapters))
316
+ # If chaps is now empty, but there were chapters to consider at the start of this step,
317
+ # log c specific warning.
318
+ if not chaps and initial_chaps_for_summary_step_count > 0:
319
+ raise ValueError("No chapters with sections were found. Please check your input data.")
291
320
 
321
+ # This line was part of the original selection.
322
+ # It will now log the titles of the chapters that are actually being processed (those with sections).
323
+ # If 'chaps' is empty, this will result in logger.info(""), which is acceptable.
324
+ logger.info(";".join(a.title for a in chaps))
292
325
  ret = [
293
- f"== {self.summary_title}\n{raw}"
326
+ ArticleSubsection.from_typst_code(self.summary_title, raw)
294
327
  for raw in (
295
328
  await self.aask(
296
329
  TEMPLATE_MANAGER.render_template(
297
330
  CONFIG.templates.chap_summary_template,
298
331
  [
299
332
  {
300
- "chapter": a.to_typst_code(),
301
- "title": a.title,
302
- "language": a.language,
333
+ "chapter": c.to_typst_code(),
334
+ "title": c.title,
335
+ "language": c.language,
303
336
  "summary_word_count": self.summary_word_count,
304
337
  "paragraph_count": self.paragraph_count,
305
338
  }
306
- for a in article.chapters
339
+ for c in chaps
307
340
  ],
308
341
  )
309
342
  )
310
343
  )
311
344
  ]
312
345
 
313
- if (to := (self.write_to or write_to)) is not None:
314
- dump_text(
315
- to,
316
- "\n\n\n".join(f"//{a.title}\n\n{s}" for a, s in zip(article.chapters, ret, strict=True)),
346
+ for c, n in zip(chaps, ret, strict=True):
347
+ c: ArticleChapter
348
+ n: ArticleSubsection
349
+ if c.sections[-1].title == self.summary_title:
350
+ logger.debug(f"Removing old summary `{self.summary_title}` at {c.title}")
351
+ c.sections.pop()
352
+
353
+ c.sections[-1].subsections.append(n)
354
+
355
+ article.update_article_file(article_path)
356
+
357
+ dump_text(
358
+ article_path, safe_text_read(article_path).replace(f"=== {self.summary_title}", f"== {self.summary_title}")
359
+ )
360
+ return article
361
+
362
+
363
+ class WriteResearchContentSummary(Action, LLMUsage):
364
+ """Write the research content summary."""
365
+
366
+ ctx_override: ClassVar[bool] = True
367
+ summary_word_count: int = 160
368
+ """The number of words to use in the research content summary."""
369
+
370
+ output_key: str = "summarized_article"
371
+ """The key under which the summarized article will be stored in the output."""
372
+
373
+ summary_title: str = "Research Content"
374
+ """The title to be used for the generated research content summary section."""
375
+
376
+ paragraph_count: int = 1
377
+ """The number of paragraphs to generate in the research content summary."""
378
+
379
+ async def _execute(self, article_path: Path, **cxt) -> Article:
380
+ article = Article.from_article_file(article_path, article_path.stem)
381
+ if not article.chapters:
382
+ raise ValueError("No chapters found in the article.")
383
+ chap_1 = article.chapters[0]
384
+ if not chap_1.sections:
385
+ raise ValueError("No sections found in the first chapter of the article.")
386
+
387
+ outline = article.extrac_outline()
388
+ suma: str = await self.aask(
389
+ TEMPLATE_MANAGER.render_template(
390
+ CONFIG.templates.research_content_summary_template,
391
+ {
392
+ "title": outline.title,
393
+ "outline": outline.to_typst_code(),
394
+ "language": detect_language(self.summary_title),
395
+ "summary_word_count": self.summary_word_count,
396
+ "paragraph_count": self.paragraph_count,
397
+ },
317
398
  )
399
+ )
400
+ logger.success(
401
+ f"{self.summary_title}|Wordcount: {word_count(suma)}|Expected: {self.summary_word_count}\n{suma}"
402
+ )
403
+
404
+ if chap_1.sections[-1].title == self.summary_title:
405
+ # remove old
406
+ logger.debug(f"Removing old summary `{self.summary_title}`")
407
+ chap_1.sections.pop()
318
408
 
319
- return ret
409
+ chap_1.sections[-1].subsections.append(ArticleSubsection.from_typst_code(self.summary_title, suma))
410
+
411
+ article.update_article_file(article_path)
412
+ dump_text(
413
+ article_path, safe_text_read(article_path).replace(f"=== {self.summary_title}", f"== {self.summary_title}")
414
+ )
415
+ return article
@@ -1,11 +1,11 @@
1
1
  """A module for writing articles using RAG (Retrieval-Augmented Generation) capabilities."""
2
2
 
3
3
  from asyncio import gather
4
-
5
4
  from pathlib import Path
6
- from pydantic import Field, PositiveInt
7
5
  from typing import ClassVar, List, Optional
8
6
 
7
+ from pydantic import Field, PositiveInt
8
+
9
9
  from fabricatio.capabilities.advanced_rag import AdvancedRAG
10
10
  from fabricatio.capabilities.censor import Censor
11
11
  from fabricatio.capabilities.extract import Extract
@@ -75,11 +75,11 @@ class WriteArticleContentRAG(Action, Extract, AdvancedRAG):
75
75
  tei_endpoint: Optional[str] = None
76
76
 
77
77
  async def _execute(
78
- self,
79
- article_outline: ArticleOutline,
80
- collection_name: Optional[str] = None,
81
- supervisor: Optional[bool] = None,
82
- **cxt,
78
+ self,
79
+ article_outline: ArticleOutline,
80
+ collection_name: Optional[str] = None,
81
+ supervisor: Optional[bool] = None,
82
+ **cxt,
83
83
  ) -> Article:
84
84
  article = Article.from_outline(article_outline).update_ref(article_outline)
85
85
  self.target_collection = collection_name or self.safe_target_collection
@@ -100,12 +100,12 @@ class WriteArticleContentRAG(Action, Extract, AdvancedRAG):
100
100
  "questionary", "`questionary` is required for supervisor mode, please install it by `fabricatio[qa]`"
101
101
  )
102
102
  async def _supervisor_inner(
103
- self,
104
- article: Article,
105
- article_outline: ArticleOutline,
106
- chap: ArticleChapter,
107
- sec: ArticleSection,
108
- subsec: ArticleSubsection,
103
+ self,
104
+ article: Article,
105
+ article_outline: ArticleOutline,
106
+ chap: ArticleChapter,
107
+ sec: ArticleSection,
108
+ subsec: ArticleSubsection,
109
109
  ) -> ArticleSubsection:
110
110
  from questionary import confirm, text
111
111
  from rich import print as r_print
@@ -133,12 +133,12 @@ class WriteArticleContentRAG(Action, Extract, AdvancedRAG):
133
133
  return await self.extract_new_subsec(subsec, raw_paras, cm)
134
134
 
135
135
  async def _inner(
136
- self,
137
- article: Article,
138
- article_outline: ArticleOutline,
139
- chap: ArticleChapter,
140
- sec: ArticleSection,
141
- subsec: ArticleSubsection,
136
+ self,
137
+ article: Article,
138
+ article_outline: ArticleOutline,
139
+ chap: ArticleChapter,
140
+ sec: ArticleSection,
141
+ subsec: ArticleSubsection,
142
142
  ) -> ArticleSubsection:
143
143
  cm = CitationManager()
144
144
 
@@ -154,7 +154,7 @@ class WriteArticleContentRAG(Action, Extract, AdvancedRAG):
154
154
  return await self.extract_new_subsec(subsec, raw_paras, cm)
155
155
 
156
156
  async def extract_new_subsec(
157
- self, subsec: ArticleSubsection, raw_paras: str, cm: CitationManager
157
+ self, subsec: ArticleSubsection, raw_paras: str, cm: CitationManager
158
158
  ) -> ArticleSubsection:
159
159
  """Extract the new subsec."""
160
160
  new_subsec = ok(
@@ -177,14 +177,14 @@ class WriteArticleContentRAG(Action, Extract, AdvancedRAG):
177
177
  return subsec
178
178
 
179
179
  async def write_raw(
180
- self,
181
- article: Article,
182
- article_outline: ArticleOutline,
183
- chap: ArticleChapter,
184
- sec: ArticleSection,
185
- subsec: ArticleSubsection,
186
- cm: CitationManager,
187
- extra_instruction: str = "",
180
+ self,
181
+ article: Article,
182
+ article_outline: ArticleOutline,
183
+ chap: ArticleChapter,
184
+ sec: ArticleSection,
185
+ subsec: ArticleSubsection,
186
+ cm: CitationManager,
187
+ extra_instruction: str = "",
188
188
  ) -> str:
189
189
  """Write the raw paragraphs of the subsec."""
190
190
  return await self.aask(
@@ -200,14 +200,14 @@ class WriteArticleContentRAG(Action, Extract, AdvancedRAG):
200
200
  )
201
201
 
202
202
  async def search_database(
203
- self,
204
- article: Article,
205
- article_outline: ArticleOutline,
206
- chap: ArticleChapter,
207
- sec: ArticleSection,
208
- subsec: ArticleSubsection,
209
- cm: CitationManager,
210
- extra_instruction: str = "",
203
+ self,
204
+ article: Article,
205
+ article_outline: ArticleOutline,
206
+ chap: ArticleChapter,
207
+ sec: ArticleSection,
208
+ subsec: ArticleSubsection,
209
+ cm: CitationManager,
210
+ extra_instruction: str = "",
211
211
  ) -> None:
212
212
  """Search database for related references."""
213
213
  search_req = (
@@ -312,12 +312,12 @@ class TweakArticleRAG(Action, RAG, Censor):
312
312
  """The limit of references to be retrieved"""
313
313
 
314
314
  async def _execute(
315
- self,
316
- article: Article,
317
- collection_name: str = "article_essence",
318
- twk_rag_ruleset: Optional[RuleSet] = None,
319
- parallel: bool = False,
320
- **cxt,
315
+ self,
316
+ article: Article,
317
+ collection_name: str = "article_essence",
318
+ twk_rag_ruleset: Optional[RuleSet] = None,
319
+ parallel: bool = False,
320
+ **cxt,
321
321
  ) -> Article:
322
322
  """Write an article based on the provided outline.
323
323
 
@@ -372,10 +372,10 @@ class TweakArticleRAG(Action, RAG, Censor):
372
372
  subsec,
373
373
  ruleset=ruleset,
374
374
  reference=f"{'\n\n'.join(d.display() for d in await self.aretrieve(refind_q, document_model=ArticleEssence, max_accepted=self.ref_limit))}\n\n"
375
- f"You can use Reference above to rewrite the `{subsec.__class__.__name__}`.\n"
376
- f"You should Always use `{subsec.language}` as written language, "
377
- f"which is the original language of the `{subsec.title}`. "
378
- f"since rewrite a `{subsec.__class__.__name__}` in a different language is usually a bad choice",
375
+ f"You can use Reference above to rewrite the `{subsec.__class__.__name__}`.\n"
376
+ f"You should Always use `{subsec.language}` as written language, "
377
+ f"which is the original language of the `{subsec.title}`. "
378
+ f"since rewrite a `{subsec.__class__.__name__}` in a different language is usually a bad choice",
379
379
  )
380
380
 
381
381
 
@@ -390,12 +390,12 @@ class ChunkArticle(Action):
390
390
  """The maximum overlapping rate between chunks."""
391
391
 
392
392
  async def _execute(
393
- self,
394
- article_path: str | Path,
395
- bib_manager: BibManager,
396
- max_chunk_size: Optional[int] = None,
397
- max_overlapping_rate: Optional[float] = None,
398
- **_,
393
+ self,
394
+ article_path: str | Path,
395
+ bib_manager: BibManager,
396
+ max_chunk_size: Optional[int] = None,
397
+ max_overlapping_rate: Optional[float] = None,
398
+ **_,
399
399
  ) -> List[ArticleChunk]:
400
400
  return ArticleChunk.from_file(
401
401
  article_path,
@@ -3,11 +3,10 @@
3
3
  from pathlib import Path
4
4
  from typing import Any, Iterable, List, Mapping, Optional, Self, Sequence, Type
5
5
 
6
- from fabricatio.capabilities.persist import PersistentAble
7
6
  from fabricatio.fs import dump_text
8
7
  from fabricatio.journal import logger
9
8
  from fabricatio.models.action import Action
10
- from fabricatio.models.generic import FinalizedDumpAble, FromMapping, FromSequence
9
+ from fabricatio.models.generic import FinalizedDumpAble, FromMapping, FromSequence, PersistentAble
11
10
  from fabricatio.models.task import Task
12
11
  from fabricatio.models.usages import LLMUsage
13
12
  from fabricatio.rust import TEMPLATE_MANAGER
@@ -21,11 +20,11 @@ class DumpFinalizedOutput(Action, LLMUsage):
21
20
  dump_path: Optional[str] = None
22
21
 
23
22
  async def _execute(
24
- self,
25
- to_dump: FinalizedDumpAble,
26
- task_input: Optional[Task] = None,
27
- dump_path: Optional[str | Path] = None,
28
- **_,
23
+ self,
24
+ to_dump: FinalizedDumpAble,
25
+ task_input: Optional[Task] = None,
26
+ dump_path: Optional[str | Path] = None,
27
+ **_,
29
28
  ) -> str:
30
29
  dump_path = Path(
31
30
  dump_path
@@ -52,11 +51,11 @@ class RenderedDump(Action, LLMUsage):
52
51
  """The template name to render the data."""
53
52
 
54
53
  async def _execute(
55
- self,
56
- to_dump: FinalizedDumpAble,
57
- task_input: Optional[Task] = None,
58
- dump_path: Optional[str | Path] = None,
59
- **_,
54
+ self,
55
+ to_dump: FinalizedDumpAble,
56
+ task_input: Optional[Task] = None,
57
+ dump_path: Optional[str | Path] = None,
58
+ **_,
60
59
  ) -> str:
61
60
  dump_path = Path(
62
61
  dump_path
@@ -91,10 +90,10 @@ class PersistentAll(Action, LLMUsage):
91
90
  """Whether to remove the existing dir before dumping."""
92
91
 
93
92
  async def _execute(
94
- self,
95
- task_input: Optional[Task] = None,
96
- persist_dir: Optional[str | Path] = None,
97
- **cxt,
93
+ self,
94
+ task_input: Optional[Task] = None,
95
+ persist_dir: Optional[str | Path] = None,
96
+ **cxt,
98
97
  ) -> int:
99
98
  persist_dir = Path(
100
99
  persist_dir
@@ -124,7 +123,7 @@ class PersistentAll(Action, LLMUsage):
124
123
  v.persist(final_dir)
125
124
  count += 1
126
125
  if isinstance(v, Iterable) and any(
127
- persistent_ables := (pers for pers in v if isinstance(pers, PersistentAble))
126
+ persistent_ables := (pers for pers in v if isinstance(pers, PersistentAble))
128
127
  ):
129
128
  logger.info(f"Persisting collection {k} to {final_dir}")
130
129
  final_dir.mkdir(parents=True, exist_ok=True)
@@ -174,11 +173,11 @@ class RetrieveFromLatest[T: PersistentAble](RetrieveFromPersistent[T], FromMappi
174
173
 
175
174
  @classmethod
176
175
  def from_mapping(
177
- cls,
178
- mapping: Mapping[str, str | Path],
179
- *,
180
- retrieve_cls: Type[T],
181
- **kwargs,
176
+ cls,
177
+ mapping: Mapping[str, str | Path],
178
+ *,
179
+ retrieve_cls: Type[T],
180
+ **kwargs,
182
181
  ) -> List["RetrieveFromLatest[T]"]:
183
182
  """Create a list of `RetrieveFromLatest` from the mapping."""
184
183
  return [
fabricatio/decorators.py CHANGED
@@ -235,6 +235,7 @@ def logging_exec_time[**P, R](
235
235
  @wraps(func)
236
236
  async def _async_wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
237
237
  start_time = time()
238
+ logger.debug(f"Starting execution of {func.__name__}")
238
239
  result = await func(*args, **kwargs)
239
240
  logger.debug(f"Execution time of `{func.__name__}`: {time() - start_time:.2f} s")
240
241
  return result
@@ -244,6 +245,7 @@ def logging_exec_time[**P, R](
244
245
  @wraps(func)
245
246
  def _wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
246
247
  start_time = time()
248
+ logger.debug(f"Starting execution of {func.__name__}")
247
249
  result = func(*args, **kwargs)
248
250
  logger.debug(f"Execution time of {func.__name__}: {(time() - start_time) * 1000:.2f} ms")
249
251
  return result
@@ -1,13 +1,9 @@
1
1
  """A Module containing the article rag models."""
2
2
 
3
- from itertools import groupby
4
-
5
3
  import re
6
4
  from dataclasses import dataclass, field
7
- from more_itertools.more import first
8
- from more_itertools.recipes import flatten, unique
5
+ from itertools import groupby
9
6
  from pathlib import Path
10
- from pydantic import Field
11
7
  from typing import ClassVar, Dict, List, Optional, Self, Unpack
12
8
 
13
9
  from fabricatio.fs import safe_text_read
@@ -17,6 +13,9 @@ from fabricatio.models.generic import AsPrompt
17
13
  from fabricatio.models.kwargs_types import ChunkKwargs
18
14
  from fabricatio.rust import BibManager, blake3_hash, split_into_chunks
19
15
  from fabricatio.utils import ok, wrapp_in_block
16
+ from more_itertools.more import first
17
+ from more_itertools.recipes import flatten, unique
18
+ from pydantic import Field
20
19
 
21
20
 
22
21
  class ArticleChunk(MilvusDataBase):