fabricatio 0.3.15.dev4__cp313-cp313-win_amd64.whl → 0.4.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. fabricatio/__init__.py +9 -8
  2. fabricatio/actions/output.py +21 -22
  3. fabricatio/actions/rules.py +83 -83
  4. fabricatio/rust.cp313-win_amd64.pyd +0 -0
  5. fabricatio/workflows/rag.py +2 -1
  6. fabricatio-0.4.0.data/scripts/tdown.exe +0 -0
  7. {fabricatio-0.3.15.dev4.dist-info → fabricatio-0.4.0.dist-info}/METADATA +18 -16
  8. fabricatio-0.4.0.dist-info/RECORD +18 -0
  9. fabricatio/actions/article.py +0 -415
  10. fabricatio/actions/article_rag.py +0 -407
  11. fabricatio/capabilities/__init__.py +0 -1
  12. fabricatio/capabilities/advanced_judge.py +0 -20
  13. fabricatio/capabilities/advanced_rag.py +0 -61
  14. fabricatio/capabilities/censor.py +0 -105
  15. fabricatio/capabilities/check.py +0 -212
  16. fabricatio/capabilities/correct.py +0 -228
  17. fabricatio/capabilities/extract.py +0 -74
  18. fabricatio/capabilities/persist.py +0 -103
  19. fabricatio/capabilities/propose.py +0 -65
  20. fabricatio/capabilities/rag.py +0 -264
  21. fabricatio/capabilities/rating.py +0 -404
  22. fabricatio/capabilities/review.py +0 -114
  23. fabricatio/capabilities/task.py +0 -113
  24. fabricatio/decorators.py +0 -253
  25. fabricatio/emitter.py +0 -177
  26. fabricatio/fs/__init__.py +0 -35
  27. fabricatio/fs/curd.py +0 -153
  28. fabricatio/fs/readers.py +0 -61
  29. fabricatio/journal.py +0 -12
  30. fabricatio/models/action.py +0 -263
  31. fabricatio/models/adv_kwargs_types.py +0 -63
  32. fabricatio/models/extra/__init__.py +0 -1
  33. fabricatio/models/extra/advanced_judge.py +0 -32
  34. fabricatio/models/extra/aricle_rag.py +0 -286
  35. fabricatio/models/extra/article_base.py +0 -486
  36. fabricatio/models/extra/article_essence.py +0 -101
  37. fabricatio/models/extra/article_main.py +0 -286
  38. fabricatio/models/extra/article_outline.py +0 -46
  39. fabricatio/models/extra/article_proposal.py +0 -52
  40. fabricatio/models/extra/patches.py +0 -20
  41. fabricatio/models/extra/problem.py +0 -165
  42. fabricatio/models/extra/rag.py +0 -98
  43. fabricatio/models/extra/rule.py +0 -52
  44. fabricatio/models/generic.py +0 -812
  45. fabricatio/models/kwargs_types.py +0 -121
  46. fabricatio/models/role.py +0 -99
  47. fabricatio/models/task.py +0 -310
  48. fabricatio/models/tool.py +0 -328
  49. fabricatio/models/usages.py +0 -791
  50. fabricatio/parser.py +0 -114
  51. fabricatio/rust.pyi +0 -846
  52. fabricatio/utils.py +0 -156
  53. fabricatio/workflows/articles.py +0 -24
  54. fabricatio-0.3.15.dev4.data/scripts/tdown.exe +0 -0
  55. fabricatio-0.3.15.dev4.data/scripts/ttm.exe +0 -0
  56. fabricatio-0.3.15.dev4.dist-info/RECORD +0 -64
  57. {fabricatio-0.3.15.dev4.dist-info → fabricatio-0.4.0.dist-info}/WHEEL +0 -0
  58. {fabricatio-0.3.15.dev4.dist-info → fabricatio-0.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,415 +0,0 @@
1
- """Actions for transmitting tasks to targets."""
2
-
3
- from asyncio import gather
4
- from pathlib import Path
5
- from typing import Callable, ClassVar, List, Optional
6
-
7
- from more_itertools import filter_map
8
- from pydantic import Field
9
- from rich import print as r_print
10
-
11
- from fabricatio.capabilities.censor import Censor
12
- from fabricatio.capabilities.extract import Extract
13
- from fabricatio.capabilities.propose import Propose
14
- from fabricatio.fs import dump_text, safe_text_read
15
- from fabricatio.journal import logger
16
- from fabricatio.models.action import Action
17
- from fabricatio.models.extra.article_essence import ArticleEssence
18
- from fabricatio.models.extra.article_main import Article, ArticleChapter, ArticleSubsection
19
- from fabricatio.models.extra.article_outline import ArticleOutline
20
- from fabricatio.models.extra.article_proposal import ArticleProposal
21
- from fabricatio.models.extra.rule import RuleSet
22
- from fabricatio.models.kwargs_types import ValidateKwargs
23
- from fabricatio.models.task import Task
24
- from fabricatio.models.usages import LLMUsage
25
- from fabricatio.rust import CONFIG, TEMPLATE_MANAGER, BibManager, detect_language, word_count
26
- from fabricatio.utils import ok, wrapp_in_block
27
-
28
-
29
- class ExtractArticleEssence(Action, Propose):
30
- """Extract the essence of article(s) in text format from the paths specified in the task dependencies.
31
-
32
- Notes:
33
- This action is designed to extract vital information from articles with Markdown format, which is pure text, and
34
- which is converted from pdf files using `magic-pdf` from the `MinerU` project, see https://github.com/opendatalab/MinerU
35
- """
36
-
37
- output_key: str = "article_essence"
38
- """The key of the output data."""
39
-
40
- async def _execute(
41
- self,
42
- task_input: Task,
43
- reader: Callable[[str], Optional[str]] = lambda p: Path(p).read_text(encoding="utf-8"),
44
- **_,
45
- ) -> List[ArticleEssence]:
46
- if not task_input.dependencies:
47
- logger.info(err := "Task not approved, since no dependencies are provided.")
48
- raise RuntimeError(err)
49
- logger.info(f"Extracting article essence from {len(task_input.dependencies)} files.")
50
- # trim the references
51
- contents = list(filter_map(reader, task_input.dependencies))
52
- logger.info(f"Read {len(task_input.dependencies)} to get {len(contents)} contents.")
53
-
54
- out = []
55
-
56
- for ess in await self.propose(
57
- ArticleEssence,
58
- [
59
- f"{c}\n\n\nBased the provided academic article above, you need to extract the essence from it.\n\nWrite the value string using `{detect_language(c)}`"
60
- for c in contents
61
- ],
62
- ):
63
- if ess is None:
64
- logger.warning("Could not extract article essence")
65
- else:
66
- out.append(ess)
67
- logger.info(f"Extracted {len(out)} article essence from {len(task_input.dependencies)} files.")
68
- return out
69
-
70
-
71
- class FixArticleEssence(Action):
72
- """Fix the article essence based on the bibtex key."""
73
-
74
- output_key: str = "fixed_article_essence"
75
- """The key of the output data."""
76
-
77
- async def _execute(
78
- self,
79
- bib_mgr: BibManager,
80
- article_essence: List[ArticleEssence],
81
- **_,
82
- ) -> List[ArticleEssence]:
83
- out = []
84
- count = 0
85
- for a in article_essence:
86
- if key := (bib_mgr.get_cite_key_by_title(a.title) or bib_mgr.get_cite_key_fuzzy(a.title)):
87
- a.title = bib_mgr.get_title_by_key(key) or a.title
88
- a.authors = bib_mgr.get_author_by_key(key) or a.authors
89
- a.publication_year = bib_mgr.get_year_by_key(key) or a.publication_year
90
- a.bibtex_cite_key = key
91
- logger.info(f"Updated {a.title} with {key}")
92
- out.append(a)
93
- else:
94
- logger.warning(f"No key found for {a.title}")
95
- count += 1
96
- if count:
97
- logger.warning(f"{count} articles have no key")
98
- return out
99
-
100
-
101
- class GenerateArticleProposal(Action, Propose):
102
- """Generate an outline for the article based on the extracted essence."""
103
-
104
- output_key: str = "article_proposal"
105
- """The key of the output data."""
106
-
107
- async def _execute(
108
- self,
109
- task_input: Optional[Task] = None,
110
- article_briefing: Optional[str] = None,
111
- article_briefing_path: Optional[str] = None,
112
- **_,
113
- ) -> Optional[ArticleProposal]:
114
- if article_briefing is None and article_briefing_path is None and task_input is None:
115
- logger.error("Task not approved, since all inputs are None.")
116
- return None
117
-
118
- briefing = article_briefing or safe_text_read(
119
- ok(
120
- article_briefing_path
121
- or await self.awhich_pathstr(
122
- f"{ok(task_input).briefing}\nExtract the path of file which contains the article briefing."
123
- ),
124
- "Could not find the path of file to read.",
125
- )
126
- )
127
-
128
- logger.info("Start generating the proposal.")
129
- return ok(
130
- await self.propose(
131
- ArticleProposal,
132
- f"{briefing}\n\nWrite the value string using `{detect_language(briefing)}` as written language.",
133
- ),
134
- "Could not generate the proposal.",
135
- ).update_ref(briefing)
136
-
137
-
138
- class GenerateInitialOutline(Action, Extract):
139
- """Generate the initial article outline based on the article proposal."""
140
-
141
- output_key: str = "initial_article_outline"
142
- """The key of the output data."""
143
-
144
- supervisor: bool = False
145
- """Whether to use the supervisor to fix the outline."""
146
-
147
- extract_kwargs: ValidateKwargs[Optional[ArticleOutline]] = Field(default_factory=ValidateKwargs)
148
- """The kwargs to extract the outline."""
149
-
150
- async def _execute(
151
- self,
152
- article_proposal: ArticleProposal,
153
- supervisor: Optional[bool] = None,
154
- **_,
155
- ) -> Optional[ArticleOutline]:
156
- req = (
157
- f"Design each chapter of a proper and academic and ready for release manner.\n"
158
- f"You Must make sure every chapter have sections, and every section have subsections.\n"
159
- f"Make the chapter and sections and subsections bing divided into a specific enough article component.\n"
160
- f"Every chapter must have sections, every section must have subsections.\n"
161
- f"Note that you SHALL use `{article_proposal.language}` as written language",
162
- )
163
-
164
- raw_outline = await self.aask(f"{(article_proposal.as_prompt())}\n{req}")
165
-
166
- if supervisor or (supervisor is None and self.supervisor):
167
- from questionary import confirm, text
168
-
169
- r_print(raw_outline)
170
- while not await confirm("Accept this version and continue?", default=True).ask_async():
171
- imp = await text("Enter the improvement:").ask_async()
172
- raw_outline = await self.aask(
173
- f"{article_proposal.as_prompt()}\n{wrapp_in_block(raw_outline, 'Previous ArticleOutline')}\n{req}\n{wrapp_in_block(imp, title='Improvement')}"
174
- )
175
- r_print(raw_outline)
176
-
177
- return ok(
178
- await self.extract(ArticleOutline, raw_outline, **self.extract_kwargs),
179
- "Could not generate the initial outline.",
180
- ).update_ref(article_proposal)
181
-
182
-
183
- class ExtractOutlineFromRaw(Action, Extract):
184
- """Extract the outline from the raw outline."""
185
-
186
- output_key: str = "article_outline_from_raw"
187
-
188
- async def _execute(self, article_outline_raw_path: str | Path, **cxt) -> ArticleOutline:
189
- logger.info(f"Extracting outline from raw: {Path(article_outline_raw_path).as_posix()}")
190
-
191
- return ok(
192
- await self.extract(ArticleOutline, safe_text_read(article_outline_raw_path)),
193
- "Could not extract the outline from raw.",
194
- )
195
-
196
-
197
- class FixIntrospectedErrors(Action, Censor):
198
- """Fix introspected errors in the article outline."""
199
-
200
- output_key: str = "introspected_errors_fixed_outline"
201
- """The key of the output data."""
202
-
203
- ruleset: Optional[RuleSet] = None
204
- """The ruleset to use to fix the introspected errors."""
205
- max_error_count: Optional[int] = None
206
- """The maximum number of errors to fix."""
207
-
208
- async def _execute(
209
- self,
210
- article_outline: ArticleOutline,
211
- intro_fix_ruleset: Optional[RuleSet] = None,
212
- **_,
213
- ) -> Optional[ArticleOutline]:
214
- counter = 0
215
- origin = article_outline
216
- while pack := article_outline.gather_introspected():
217
- logger.info(f"Found {counter}th introspected errors")
218
- logger.warning(f"Found introspected error: {pack}")
219
- article_outline = ok(
220
- await self.censor_obj(
221
- article_outline,
222
- ruleset=ok(intro_fix_ruleset or self.ruleset, "No ruleset provided"),
223
- reference=f"{article_outline.display()}\n # Fatal Error of the Original Article Outline\n{pack}",
224
- ),
225
- "Could not correct the component.",
226
- ).update_ref(origin)
227
-
228
- if self.max_error_count and counter > self.max_error_count:
229
- logger.warning("Max error count reached, stopping.")
230
- break
231
- counter += 1
232
-
233
- return article_outline
234
-
235
-
236
- class GenerateArticle(Action, Censor):
237
- """Generate the article based on the outline."""
238
-
239
- output_key: str = "article"
240
- """The key of the output data."""
241
- ruleset: Optional[RuleSet] = None
242
-
243
- async def _execute(
244
- self,
245
- article_outline: ArticleOutline,
246
- article_gen_ruleset: Optional[RuleSet] = None,
247
- **_,
248
- ) -> Optional[Article]:
249
- article: Article = Article.from_outline(ok(article_outline, "Article outline not specified.")).update_ref(
250
- article_outline
251
- )
252
-
253
- await gather(
254
- *[
255
- self.censor_obj_inplace(
256
- subsec,
257
- ruleset=ok(article_gen_ruleset or self.ruleset, "No ruleset provided"),
258
- reference=f"{article_outline.as_prompt()}\n# Error Need to be fixed\n{err}\nYou should use `{subsec.language}` to write the new `Subsection`.",
259
- )
260
- for _, _, subsec in article.iter_subsections()
261
- if (err := subsec.introspect()) and logger.warning(f"Found Introspection Error:\n{err}") is None
262
- ],
263
- )
264
-
265
- return article
266
-
267
-
268
- class LoadArticle(Action):
269
- """Load the article from the outline and typst code."""
270
-
271
- output_key: str = "loaded_article"
272
-
273
- async def _execute(self, article_outline: ArticleOutline, typst_code: str, **cxt) -> Article:
274
- return Article.from_mixed_source(article_outline, typst_code)
275
-
276
-
277
- class WriteChapterSummary(Action, LLMUsage):
278
- """Write the chapter summary."""
279
-
280
- ctx_override: ClassVar[bool] = True
281
-
282
- paragraph_count: int = 1
283
- """The number of paragraphs to generate in the chapter summary."""
284
-
285
- summary_word_count: int = 120
286
- """The number of words to use in each chapter summary."""
287
- output_key: str = "summarized_article"
288
- """The key under which the summarized article will be stored in the output."""
289
- summary_title: str = "Chapter Summary"
290
- """The title to be used for the generated chapter summary section."""
291
-
292
- skip_chapters: List[str] = Field(default_factory=list)
293
- """A list of chapter titles to skip during summary generation."""
294
-
295
- async def _execute(self, article_path: Path, **cxt) -> Article:
296
- article = Article.from_article_file(article_path, article_path.stem)
297
-
298
- chaps = [c for c in article.chapters if c.title not in self.skip_chapters]
299
-
300
- retained_chapters = []
301
- # Count chapters before filtering based on section presence,
302
- # chaps at this point has already been filtered by self.skip_chapters
303
- initial_chaps_for_summary_step_count = len(chaps)
304
-
305
- for chapter_candidate in chaps:
306
- if chapter_candidate.sections: # Check if the sections list is non-empty
307
- retained_chapters.append(chapter_candidate)
308
- else:
309
- # Log c warning for each chapter skipped due to lack of sections
310
- logger.warning(
311
- f"Chapter '{chapter_candidate.title}' has no sections and will be skipped for summary generation."
312
- )
313
-
314
- chaps = retained_chapters # Update chaps to only include chapters with sections
315
-
316
- # If chaps is now empty, but there were chapters to consider at the start of this step,
317
- # log c specific warning.
318
- if not chaps and initial_chaps_for_summary_step_count > 0:
319
- raise ValueError("No chapters with sections were found. Please check your input data.")
320
-
321
- # This line was part of the original selection.
322
- # It will now log the titles of the chapters that are actually being processed (those with sections).
323
- # If 'chaps' is empty, this will result in logger.info(""), which is acceptable.
324
- logger.info(";".join(a.title for a in chaps))
325
- ret = [
326
- ArticleSubsection.from_typst_code(self.summary_title, raw)
327
- for raw in (
328
- await self.aask(
329
- TEMPLATE_MANAGER.render_template(
330
- CONFIG.templates.chap_summary_template,
331
- [
332
- {
333
- "chapter": c.to_typst_code(),
334
- "title": c.title,
335
- "language": c.language,
336
- "summary_word_count": self.summary_word_count,
337
- "paragraph_count": self.paragraph_count,
338
- }
339
- for c in chaps
340
- ],
341
- )
342
- )
343
- )
344
- ]
345
-
346
- for c, n in zip(chaps, ret, strict=True):
347
- c: ArticleChapter
348
- n: ArticleSubsection
349
- if c.sections[-1].title == self.summary_title:
350
- logger.debug(f"Removing old summary `{self.summary_title}` at {c.title}")
351
- c.sections.pop()
352
-
353
- c.sections[-1].subsections.append(n)
354
-
355
- article.update_article_file(article_path)
356
-
357
- dump_text(
358
- article_path, safe_text_read(article_path).replace(f"=== {self.summary_title}", f"== {self.summary_title}")
359
- )
360
- return article
361
-
362
-
363
- class WriteResearchContentSummary(Action, LLMUsage):
364
- """Write the research content summary."""
365
-
366
- ctx_override: ClassVar[bool] = True
367
- summary_word_count: int = 160
368
- """The number of words to use in the research content summary."""
369
-
370
- output_key: str = "summarized_article"
371
- """The key under which the summarized article will be stored in the output."""
372
-
373
- summary_title: str = "Research Content"
374
- """The title to be used for the generated research content summary section."""
375
-
376
- paragraph_count: int = 1
377
- """The number of paragraphs to generate in the research content summary."""
378
-
379
- async def _execute(self, article_path: Path, **cxt) -> Article:
380
- article = Article.from_article_file(article_path, article_path.stem)
381
- if not article.chapters:
382
- raise ValueError("No chapters found in the article.")
383
- chap_1 = article.chapters[0]
384
- if not chap_1.sections:
385
- raise ValueError("No sections found in the first chapter of the article.")
386
-
387
- outline = article.extrac_outline()
388
- suma: str = await self.aask(
389
- TEMPLATE_MANAGER.render_template(
390
- CONFIG.templates.research_content_summary_template,
391
- {
392
- "title": outline.title,
393
- "outline": outline.to_typst_code(),
394
- "language": detect_language(self.summary_title),
395
- "summary_word_count": self.summary_word_count,
396
- "paragraph_count": self.paragraph_count,
397
- },
398
- )
399
- )
400
- logger.success(
401
- f"{self.summary_title}|Wordcount: {word_count(suma)}|Expected: {self.summary_word_count}\n{suma}"
402
- )
403
-
404
- if chap_1.sections[-1].title == self.summary_title:
405
- # remove old
406
- logger.debug(f"Removing old summary `{self.summary_title}`")
407
- chap_1.sections.pop()
408
-
409
- chap_1.sections[-1].subsections.append(ArticleSubsection.from_typst_code(self.summary_title, suma))
410
-
411
- article.update_article_file(article_path)
412
- dump_text(
413
- article_path, safe_text_read(article_path).replace(f"=== {self.summary_title}", f"== {self.summary_title}")
414
- )
415
- return article