fabricatio 0.2.7.dev5__cp312-cp312-manylinux_2_34_x86_64.whl → 0.2.8__cp312-cp312-manylinux_2_34_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. fabricatio/__init__.py +4 -11
  2. fabricatio/actions/article.py +219 -92
  3. fabricatio/actions/article_rag.py +86 -21
  4. fabricatio/actions/output.py +71 -3
  5. fabricatio/actions/rag.py +3 -3
  6. fabricatio/actions/rules.py +39 -0
  7. fabricatio/capabilities/advanced_judge.py +23 -0
  8. fabricatio/capabilities/censor.py +90 -0
  9. fabricatio/capabilities/check.py +195 -0
  10. fabricatio/capabilities/correct.py +160 -96
  11. fabricatio/capabilities/propose.py +20 -4
  12. fabricatio/capabilities/rag.py +5 -4
  13. fabricatio/capabilities/rating.py +68 -23
  14. fabricatio/capabilities/review.py +21 -190
  15. fabricatio/capabilities/task.py +9 -10
  16. fabricatio/config.py +11 -3
  17. fabricatio/fs/curd.py +4 -0
  18. fabricatio/models/action.py +24 -10
  19. fabricatio/models/adv_kwargs_types.py +25 -0
  20. fabricatio/models/extra/__init__.py +1 -0
  21. fabricatio/models/extra/advanced_judge.py +32 -0
  22. fabricatio/models/extra/article_base.py +222 -86
  23. fabricatio/models/extra/article_essence.py +49 -176
  24. fabricatio/models/extra/article_main.py +35 -51
  25. fabricatio/models/extra/article_outline.py +10 -156
  26. fabricatio/models/extra/article_proposal.py +29 -13
  27. fabricatio/models/extra/patches.py +7 -0
  28. fabricatio/models/extra/problem.py +153 -0
  29. fabricatio/models/extra/rule.py +65 -0
  30. fabricatio/models/generic.py +311 -94
  31. fabricatio/models/kwargs_types.py +23 -17
  32. fabricatio/models/role.py +4 -1
  33. fabricatio/models/task.py +1 -1
  34. fabricatio/models/tool.py +149 -14
  35. fabricatio/models/usages.py +61 -47
  36. fabricatio/models/utils.py +0 -46
  37. fabricatio/parser.py +7 -8
  38. fabricatio/rust.cpython-312-x86_64-linux-gnu.so +0 -0
  39. fabricatio/{_rust.pyi → rust.pyi} +50 -0
  40. fabricatio/{_rust_instances.py → rust_instances.py} +1 -1
  41. fabricatio/utils.py +54 -0
  42. fabricatio-0.2.8.data/scripts/tdown +0 -0
  43. {fabricatio-0.2.7.dev5.dist-info → fabricatio-0.2.8.dist-info}/METADATA +2 -1
  44. fabricatio-0.2.8.dist-info/RECORD +58 -0
  45. fabricatio/_rust.cpython-312-x86_64-linux-gnu.so +0 -0
  46. fabricatio-0.2.7.dev5.data/scripts/tdown +0 -0
  47. fabricatio-0.2.7.dev5.dist-info/RECORD +0 -47
  48. {fabricatio-0.2.7.dev5.dist-info → fabricatio-0.2.8.dist-info}/WHEEL +0 -0
  49. {fabricatio-0.2.7.dev5.dist-info → fabricatio-0.2.8.dist-info}/licenses/LICENSE +0 -0
fabricatio/__init__.py CHANGED
@@ -1,10 +1,6 @@
1
1
  """Fabricatio is a Python library for building llm app using event-based agent structure."""
2
2
 
3
- from importlib.util import find_spec
4
-
5
- from fabricatio import actions, toolboxes, workflows
6
- from fabricatio._rust import BibManager
7
- from fabricatio._rust_instances import TEMPLATE_MANAGER
3
+ from fabricatio import actions, capabilities, toolboxes, workflows
8
4
  from fabricatio.core import env
9
5
  from fabricatio.journal import logger
10
6
  from fabricatio.models import extra
@@ -14,6 +10,8 @@ from fabricatio.models.role import Role
14
10
  from fabricatio.models.task import Task
15
11
  from fabricatio.models.tool import ToolBox
16
12
  from fabricatio.parser import Capture, GenericCapture, JsonCapture, PythonCapture
13
+ from fabricatio.rust import BibManager
14
+ from fabricatio.rust_instances import TEMPLATE_MANAGER
17
15
 
18
16
  __all__ = [
19
17
  "TEMPLATE_MANAGER",
@@ -29,15 +27,10 @@ __all__ = [
29
27
  "ToolBox",
30
28
  "WorkFlow",
31
29
  "actions",
30
+ "capabilities",
32
31
  "env",
33
32
  "extra",
34
33
  "logger",
35
34
  "toolboxes",
36
35
  "workflows",
37
36
  ]
38
-
39
-
40
- if find_spec("pymilvus"):
41
- from fabricatio.capabilities.rag import RAG
42
-
43
- __all__ += ["RAG"]
@@ -1,20 +1,28 @@
1
1
  """Actions for transmitting tasks to targets."""
2
2
 
3
+ from asyncio import gather
3
4
  from pathlib import Path
4
5
  from typing import Any, Callable, List, Optional
5
6
 
7
+ from fabricatio.capabilities.censor import Censor
8
+ from fabricatio.capabilities.correct import Correct
9
+ from fabricatio.capabilities.propose import Propose
6
10
  from fabricatio.fs import safe_text_read
7
11
  from fabricatio.journal import logger
8
12
  from fabricatio.models.action import Action
13
+ from fabricatio.models.extra.article_base import ArticleRefSequencePatch
9
14
  from fabricatio.models.extra.article_essence import ArticleEssence
10
15
  from fabricatio.models.extra.article_main import Article
11
16
  from fabricatio.models.extra.article_outline import ArticleOutline
12
17
  from fabricatio.models.extra.article_proposal import ArticleProposal
18
+ from fabricatio.models.extra.rule import RuleSet
13
19
  from fabricatio.models.task import Task
14
- from fabricatio.models.utils import ok
20
+ from fabricatio.rust import BibManager
21
+ from fabricatio.utils import ok
22
+ from more_itertools import filter_map
15
23
 
16
24
 
17
- class ExtractArticleEssence(Action):
25
+ class ExtractArticleEssence(Action, Propose):
18
26
  """Extract the essence of article(s) in text format from the paths specified in the task dependencies.
19
27
 
20
28
  Notes:
@@ -28,23 +36,65 @@ class ExtractArticleEssence(Action):
28
36
  async def _execute(
29
37
  self,
30
38
  task_input: Task,
31
- reader: Callable[[str], str] = lambda p: Path(p).read_text(encoding="utf-8"),
39
+ reader: Callable[[str], Optional[str]] = lambda p: Path(p).read_text(encoding="utf-8"),
32
40
  **_,
33
- ) -> Optional[List[ArticleEssence]]:
41
+ ) -> List[ArticleEssence]:
34
42
  if not task_input.dependencies:
35
43
  logger.info(err := "Task not approved, since no dependencies are provided.")
36
44
  raise RuntimeError(err)
37
-
45
+ logger.info(f"Extracting article essence from {len(task_input.dependencies)} files.")
38
46
  # trim the references
39
- contents = ["References".join(c.split("References")[:-1]) for c in map(reader, task_input.dependencies)]
40
- return await self.propose(
41
- ArticleEssence,
42
- contents,
43
- system_message=f"# your personal briefing: \n{self.briefing}",
44
- )
47
+ contents = list(filter_map(reader, task_input.dependencies))
48
+ logger.info(f"Read {len(task_input.dependencies)} to get {len(contents)} contents.")
45
49
 
50
+ out = []
51
+
52
+ for ess in await self.propose(
53
+ ArticleEssence,
54
+ [
55
+ f"{c}\n\n\nBased the provided academic article above, you need to extract the essence from it."
56
+ for c in contents
57
+ ],
58
+ ):
59
+ if ess is None:
60
+ logger.warning("Could not extract article essence")
61
+ else:
62
+ out.append(ess)
63
+ logger.info(f"Extracted {len(out)} article essence from {len(task_input.dependencies)} files.")
64
+ return out
65
+
66
+
67
+ class FixArticleEssence(Action):
68
+ """Fix the article essence based on the bibtex key."""
69
+
70
+ output_key: str = "fixed_article_essence"
71
+ """The key of the output data."""
46
72
 
47
- class GenerateArticleProposal(Action):
73
+ async def _execute(
74
+ self,
75
+ bib_mgr: BibManager,
76
+ article_essence: List[ArticleEssence],
77
+ **_,
78
+ ) -> List[ArticleEssence]:
79
+ out = []
80
+ count = 0
81
+ for a in article_essence:
82
+ if key := (bib_mgr.get_cite_key(a.title) or bib_mgr.get_cite_key_fuzzy(a.title)):
83
+ a.title = bib_mgr.get_title_by_key(key) or a.title
84
+ a.authors = bib_mgr.get_author_by_key(key) or a.authors
85
+ a.publication_year = bib_mgr.get_year_by_key(key) or a.publication_year
86
+ a.bibtex_cite_key = key
87
+ logger.info(f"Updated {a.title} with {key}")
88
+ out.append(a)
89
+ else:
90
+ logger.warning(f"No key found for {a.title}")
91
+ count += 1
92
+ if count:
93
+ logger.warning(f"{count} articles have no key")
94
+ return out
95
+
96
+
97
+ class GenerateArticleProposal(Action, Propose):
48
98
  """Generate an outline for the article based on the extracted essence."""
49
99
 
50
100
  output_key: str = "article_proposal"
@@ -55,13 +105,14 @@ class GenerateArticleProposal(Action):
55
105
  task_input: Optional[Task] = None,
56
106
  article_briefing: Optional[str] = None,
57
107
  article_briefing_path: Optional[str] = None,
108
+ langauge: Optional[str] = None,
58
109
  **_,
59
110
  ) -> Optional[ArticleProposal]:
60
111
  if article_briefing is None and article_briefing_path is None and task_input is None:
61
112
  logger.error("Task not approved, since all inputs are None.")
62
113
  return None
63
114
 
64
- return ok(
115
+ proposal = ok(
65
116
  await self.propose(
66
117
  ArticleProposal,
67
118
  briefing := (
@@ -76,16 +127,19 @@ class GenerateArticleProposal(Action):
76
127
  )
77
128
  )
78
129
  ),
79
- **self.prepend_sys_msg(),
80
130
  ),
81
- "Could not generate the proposal."
131
+ "Could not generate the proposal.",
82
132
  ).update_ref(briefing)
133
+ if langauge:
134
+ proposal.language = langauge
83
135
 
136
+ return proposal
84
137
 
85
- class GenerateOutline(Action):
86
- """Generate the article based on the outline."""
87
138
 
88
- output_key: str = "article_outline"
139
+ class GenerateInitialOutline(Action, Propose):
140
+ """Generate the initial article outline based on the article proposal."""
141
+
142
+ output_key: str = "initial_article_outline"
89
143
  """The key of the output data."""
90
144
 
91
145
  async def _execute(
@@ -93,118 +147,191 @@ class GenerateOutline(Action):
93
147
  article_proposal: ArticleProposal,
94
148
  **_,
95
149
  ) -> Optional[ArticleOutline]:
96
- out = ok(
150
+ return ok(
97
151
  await self.propose(
98
152
  ArticleOutline,
99
153
  article_proposal.as_prompt(),
100
- **self.prepend_sys_msg(),
101
154
  ),
102
- "Could not generate the outline.",
103
- )
155
+ "Could not generate the initial outline.",
156
+ ).update_ref(article_proposal)
104
157
 
105
- manual = ok(await self.draft_rating_manual(
106
- topic=(
107
- topic
108
- := "Fix the internal referring error, make sure there is no more `ArticleRef` pointing to a non-existing article component."
109
- ),
110
- ),"Could not generate the rating manual.")
111
158
 
112
- while pack := out.find_illegal():
113
- component, err = ok(pack)
114
- logger.warning(f"Found error in the outline: \n{err}")
115
- corrected = ok(await self.correct_obj(
116
- component,
117
- reference=f"# Original Article Outline\n{out.display()}\n# Error Need to be fixed\n{err}",
118
- topic=topic,
119
- rating_manual=manual,
120
- supervisor_check=False,
121
- ),"Could not correct the component.")
122
- component.update_from(corrected)
123
- return out.update_ref(article_proposal)
159
+ class FixIntrospectedErrors(Action, Censor):
160
+ """Fix introspected errors in the article outline."""
124
161
 
162
+ output_key: str = "introspected_errors_fixed_outline"
163
+ """The key of the output data."""
125
164
 
126
- class CorrectProposal(Action):
127
- """Correct the proposal of the article."""
165
+ ruleset: Optional[RuleSet] = None
166
+ """The ruleset to use to fix the introspected errors."""
128
167
 
129
- output_key: str = "corrected_proposal"
168
+ async def _execute(
169
+ self,
170
+ article_outline: ArticleOutline,
171
+ ruleset: Optional[RuleSet] = None,
172
+ **_,
173
+ ) -> Optional[ArticleOutline]:
174
+ while pack := article_outline.find_introspected():
175
+ component, err = ok(pack)
176
+ logger.warning(f"Found introspected error: {err}")
177
+ corrected = ok(
178
+ await self.censor_obj(
179
+ component,
180
+ ruleset=ok(ruleset or self.ruleset, "No ruleset provided"),
181
+ reference=f"# Original Article Outline\n{article_outline.display()}\n# Some Basic errors found from `{component.title}` that need to be fixed\n{err}",
182
+ ),
183
+ "Could not correct the component.",
184
+ )
185
+ component.update_from(corrected)
130
186
 
131
- async def _execute(self, article_proposal: ArticleProposal, **_) -> Any:
132
- return (await self.censor_obj(article_proposal, reference=article_proposal.referenced)).update_ref(
133
- article_proposal
134
- )
187
+ return article_outline
135
188
 
136
189
 
137
- class CorrectOutline(Action):
138
- """Correct the outline of the article."""
190
+ class FixIllegalReferences(Action, Censor):
191
+ """Fix illegal references in the article outline."""
139
192
 
140
- output_key: str = "corrected_outline"
193
+ output_key: str = "illegal_references_fixed_outline"
141
194
  """The key of the output data."""
142
195
 
196
+ ruleset: Optional[RuleSet] = None
197
+ """Ruleset to use to fix the illegal references."""
198
+
143
199
  async def _execute(
144
200
  self,
145
201
  article_outline: ArticleOutline,
202
+ ruleset: Optional[RuleSet] = None,
146
203
  **_,
204
+ ) -> Optional[ArticleOutline]:
205
+ while pack := article_outline.find_illegal_ref(gather_identical=True):
206
+ refs, err = ok(pack)
207
+ logger.warning(f"Found illegal referring error: {err}")
208
+ corrected_ref = ok(
209
+ await self.censor_obj(
210
+ refs[0], # pyright: ignore [reportIndexIssue]
211
+ ruleset=ok(ruleset or self.ruleset, "No ruleset provided"),
212
+ reference=f"# Original Article Outline\n{article_outline.display()}\n# Some Basic errors found that need to be fixed\n{err}",
213
+ )
214
+ )
215
+ for ref in refs:
216
+ ref.update_from(corrected_ref) # pyright: ignore [reportAttributeAccessIssue]
217
+
218
+ return article_outline.update_ref(article_outline)
219
+
220
+
221
+ class TweakOutlineForwardRef(Action, Censor):
222
+ """Tweak the forward references in the article outline.
223
+
224
+ Ensures that the conclusions of the current chapter effectively support the analysis of subsequent chapters.
225
+ """
226
+
227
+ output_key: str = "article_outline_fw_ref_checked"
228
+ ruleset: Optional[RuleSet] = None
229
+ """Ruleset to use to fix the illegal references."""
230
+
231
+ async def _execute(
232
+ self, article_outline: ArticleOutline, ruleset: Optional[RuleSet] = None, **cxt
147
233
  ) -> ArticleOutline:
148
- return (await self.censor_obj(article_outline, reference=article_outline.referenced.as_prompt())).update_ref(
149
- article_outline
234
+ return await self._inner(
235
+ article_outline,
236
+ ruleset=ok(ruleset or self.ruleset, "No ruleset provided"),
237
+ field_name="support_to",
150
238
  )
151
239
 
240
+ async def _inner(self, article_outline: ArticleOutline, ruleset: RuleSet, field_name: str) -> ArticleOutline:
241
+ for a in article_outline.iter_dfs():
242
+ if judge := await self.evidently_judge(
243
+ f"{article_outline.as_prompt()}\n\n{a.display()}\n"
244
+ f"Does the `{a.__class__.__name__}`'s `{field_name}` field need to be extended or tweaked?"
245
+ ):
246
+ patch = ArticleRefSequencePatch.default()
247
+ patch.tweaked = getattr(a, field_name)
248
+
249
+ await self.censor_obj_inplace(
250
+ patch,
251
+ ruleset=ruleset,
252
+ reference=f"{article_outline.as_prompt()}\n"
253
+ f"The Article component titled `{a.title}` whose `{field_name}` field needs to be extended or tweaked.\n"
254
+ f"# Judgement\n{judge.display()}",
255
+ )
256
+ return article_outline
257
+
258
+
259
+ class TweakOutlineBackwardRef(TweakOutlineForwardRef):
260
+ """Tweak the backward references in the article outline.
261
+
262
+ Ensures that the prerequisites of the current chapter are correctly referenced in the `depend_on` field.
263
+ """
264
+
265
+ output_key: str = "article_outline_bw_ref_checked"
266
+ ruleset: Optional[RuleSet] = None
152
267
 
153
- class GenerateArticle(Action):
268
+ async def _execute(
269
+ self, article_outline: ArticleOutline, ruleset: Optional[RuleSet] = None, **cxt
270
+ ) -> ArticleOutline:
271
+ return await self._inner(
272
+ article_outline,
273
+ ruleset=ok(ruleset or self.ruleset, "No ruleset provided"),
274
+ field_name="depend_on",
275
+ )
276
+
277
+
278
+ class GenerateArticle(Action, Censor):
154
279
  """Generate the article based on the outline."""
155
280
 
156
281
  output_key: str = "article"
157
282
  """The key of the output data."""
283
+ ruleset: Optional[RuleSet] = None
158
284
 
159
285
  async def _execute(
160
286
  self,
161
287
  article_outline: ArticleOutline,
288
+ ruleset: Optional[RuleSet] = None,
162
289
  **_,
163
290
  ) -> Optional[Article]:
164
- article: Article = Article.from_outline(article_outline).update_ref(article_outline)
291
+ article: Article = Article.from_outline(ok(article_outline, "Article outline not specified.")).update_ref(
292
+ article_outline
293
+ )
165
294
 
166
- writing_manual = ok(await self.draft_rating_manual(
167
- topic=(
168
- topic_1
169
- := "improve the content of the subsection to fit the outline. SHALL never add or remove any section or subsection, you can only add or delete paragraphs within the subsection."
170
- ),
171
- ))
172
- err_resolve_manual = ok(await self.draft_rating_manual(
173
- topic=(topic_2 := "this article component has violated the constrain, please correct it.")
174
- ))
175
- for c, deps in article.iter_dfs_with_deps(chapter=False):
176
- logger.info(f"Updating the article component: \n{c.display()}")
177
-
178
- out = ok(
179
- await self.correct_obj(
180
- c,
181
- reference=(
182
- ref := f"{article_outline.referenced.as_prompt()}\n" + "\n".join(d.display() for d in deps)
183
- ),
184
- topic=topic_1,
185
- rating_manual=writing_manual,
186
- supervisor_check=False,
187
- ),
188
- "Could not correct the article component.",
189
- )
190
- while err := c.resolve_update_conflict(out):
191
- logger.warning(f"Found error in the article component: \n{err}")
192
- out = ok(
193
- await self.correct_obj(
194
- out,
195
- reference=f"{ref}\n\n# Violated Error\n{err}",
196
- topic=topic_2,
197
- rating_manual=err_resolve_manual,
198
- supervisor_check=False,
199
- ),
200
- "Could not correct the article component.",
295
+ await gather(
296
+ *[
297
+ self.censor_obj_inplace(
298
+ subsec,
299
+ ruleset=ok(ruleset or self.ruleset, "No ruleset provided"),
300
+ reference=f"# Original Article Outline\n{article_outline.display()}\n# Error Need to be fixed\n{err}",
201
301
  )
302
+ for _, __, subsec in article.iter_subsections()
303
+ if (err := subsec.introspect())
304
+ ],
305
+ return_exceptions=True,
306
+ )
202
307
 
203
- c.update_from(out)
204
308
  return article
205
309
 
206
310
 
207
- class CorrectArticle(Action):
311
+ class CorrectProposal(Action, Censor):
312
+ """Correct the proposal of the article."""
313
+
314
+ output_key: str = "corrected_proposal"
315
+
316
+ async def _execute(self, article_proposal: ArticleProposal, **_) -> Any:
317
+ raise NotImplementedError("Not implemented.")
318
+
319
+
320
+ class CorrectOutline(Action, Correct):
321
+ """Correct the outline of the article."""
322
+
323
+ output_key: str = "corrected_outline"
324
+ """The key of the output data."""
325
+
326
+ async def _execute(
327
+ self,
328
+ article_outline: ArticleOutline,
329
+ **_,
330
+ ) -> ArticleOutline:
331
+ raise NotImplementedError("Not implemented.")
332
+
333
+
334
+ class CorrectArticle(Action, Correct):
208
335
  """Correct the article based on the outline."""
209
336
 
210
337
  output_key: str = "corrected_article"
@@ -216,4 +343,4 @@ class CorrectArticle(Action):
216
343
  article_outline: ArticleOutline,
217
344
  **_,
218
345
  ) -> Article:
219
- return await self.censor_obj(article, reference=article_outline.referenced.as_prompt())
346
+ raise NotImplementedError("Not implemented.")
@@ -1,35 +1,100 @@
1
1
  """A module for writing articles using RAG (Retrieval-Augmented Generation) capabilities."""
2
2
 
3
+ from asyncio import gather
3
4
  from typing import Optional
4
5
 
6
+ from fabricatio.capabilities.censor import Censor
5
7
  from fabricatio.capabilities.rag import RAG
6
- from fabricatio.journal import logger
7
8
  from fabricatio.models.action import Action
8
- from fabricatio.models.extra.article_main import Article
9
- from fabricatio.models.extra.article_outline import ArticleOutline
9
+ from fabricatio.models.extra.article_main import Article, ArticleParagraphSequencePatch, ArticleSubsection
10
+ from fabricatio.models.extra.rule import RuleSet
11
+ from fabricatio.utils import ok
10
12
 
11
13
 
12
- class GenerateArticleRAG(Action, RAG):
13
- """Write an article based on the provided outline."""
14
+ class TweakArticleRAG(Action, RAG, Censor):
15
+ """Write an article based on the provided outline.
14
16
 
15
- output_key: str = "article"
17
+ This class inherits from `Action`, `RAG`, and `Censor` to provide capabilities for writing and refining articles
18
+ using Retrieval-Augmented Generation (RAG) techniques. It processes an article outline, enhances subsections by
19
+ searching for related references, and applies censoring rules to ensure compliance with the provided ruleset.
16
20
 
17
- async def _execute(self, article_outline: ArticleOutline, **cxt) -> Optional[Article]:
18
- """Write an article based on the provided outline."""
19
- logger.info(f"Writing an article based on the outline:\n{article_outline.title}")
20
- refined_q = await self.arefined_query(article_outline.display())
21
- return await self.propose(
22
- Article,
23
- article_outline.display(),
24
- **self.prepend_sys_msg(f"{await self.aretrieve_compact(refined_q)}\n{self.briefing}"),
25
- )
21
+ Attributes:
22
+ output_key (str): The key used to store the output of the action.
23
+ ruleset (Optional[RuleSet]): The ruleset to be used for censoring the article.
24
+ """
25
+
26
+ output_key: str = "rag_tweaked_article"
27
+ """The key used to store the output of the action."""
28
+
29
+ ruleset: Optional[RuleSet] = None
30
+ """The ruleset to be used for censoring the article."""
31
+
32
+ async def _execute(
33
+ self,
34
+ article: Article,
35
+ collection_name: str = "article_essence",
36
+ ruleset: Optional[RuleSet] = None,
37
+ parallel: bool = False,
38
+ **cxt,
39
+ ) -> Optional[Article]:
40
+ """Write an article based on the provided outline.
41
+
42
+ This method processes the article outline, either in parallel or sequentially, by enhancing each subsection
43
+ with relevant references and applying censoring rules.
26
44
 
45
+ Args:
46
+ article (Article): The article to be processed.
47
+ collection_name (str): The name of the collection to view for processing.
48
+ ruleset (Optional[RuleSet]): The ruleset to apply for censoring. If not provided, the class's ruleset is used.
49
+ parallel (bool): If True, process subsections in parallel. Otherwise, process them sequentially.
50
+ **cxt: Additional context parameters.
27
51
 
28
- class WriteArticleFineGrind(Action, RAG):
29
- """Fine-grind an article based on the provided outline."""
52
+ Returns:
53
+ Optional[Article]: The processed article with enhanced subsections and applied censoring rules.
54
+ """
55
+ self.view(collection_name)
30
56
 
31
- output_key: str = "article"
57
+ if parallel:
58
+ await gather(
59
+ *[
60
+ self._inner(article, subsec, ok(ruleset or self.ruleset, "No ruleset provided!"))
61
+ for _, __, subsec in article.iter_subsections()
62
+ ],
63
+ return_exceptions=True,
64
+ )
65
+ else:
66
+ for _, __, subsec in article.iter_subsections():
67
+ await self._inner(article, subsec, ok(ruleset or self.ruleset, "No ruleset provided!"))
68
+ return article
32
69
 
33
- async def _execute(self, article_outline: ArticleOutline, **cxt) -> Optional[Article]:
34
- """Fine-grind an article based on the provided outline."""
35
- logger.info(f"Fine-grinding an article based on the outline:\n{article_outline.title}")
70
+ async def _inner(self, article: Article, subsec: ArticleSubsection, ruleset: RuleSet) -> None:
71
+ """Enhance a subsection of the article with references and apply censoring rules.
72
+
73
+ This method refines the query for the subsection, retrieves related references, and applies censoring rules
74
+ to the subsection's paragraphs.
75
+
76
+ Args:
77
+ article (Article): The article containing the subsection.
78
+ subsec (ArticleSubsection): The subsection to be enhanced.
79
+ ruleset (RuleSet): The ruleset to apply for censoring.
80
+
81
+ Returns:
82
+ None
83
+ """
84
+ refind_q = ok(
85
+ await self.arefined_query(
86
+ f"{article.referenced.as_prompt()}\n"
87
+ f"# Subsection requiring reference enhancement\n"
88
+ f"{subsec.display()}\n"
89
+ f"# Requirement\n"
90
+ f"Search related articles in the base to find reference candidates, "
91
+ f"prioritizing both original article language and English usage",
92
+ )
93
+ )
94
+ patch = ArticleParagraphSequencePatch.default()
95
+ patch.tweaked = subsec.paragraphs
96
+ await self.censor_obj_inplace(
97
+ patch,
98
+ ruleset=ruleset,
99
+ reference=await self.aretrieve_compact(refind_q, final_limit=30),
100
+ )
@@ -1,18 +1,20 @@
1
1
  """Dump the finalized output to a file."""
2
2
 
3
3
  from pathlib import Path
4
- from typing import Optional
4
+ from typing import Iterable, List, Optional, Type
5
5
 
6
+ from fabricatio.journal import logger
6
7
  from fabricatio.models.action import Action
7
- from fabricatio.models.generic import FinalizedDumpAble
8
+ from fabricatio.models.generic import FinalizedDumpAble, PersistentAble
8
9
  from fabricatio.models.task import Task
9
- from fabricatio.models.utils import ok
10
+ from fabricatio.utils import ok
10
11
 
11
12
 
12
13
  class DumpFinalizedOutput(Action):
13
14
  """Dump the finalized output to a file."""
14
15
 
15
16
  output_key: str = "dump_path"
17
+ dump_path: Optional[str] = None
16
18
 
17
19
  async def _execute(
18
20
  self,
@@ -23,6 +25,7 @@ class DumpFinalizedOutput(Action):
23
25
  ) -> str:
24
26
  dump_path = Path(
25
27
  dump_path
28
+ or self.dump_path
26
29
  or ok(
27
30
  await self.awhich_pathstr(
28
31
  f"{ok(task_input, 'Neither `task_input` and `dump_path` is provided.').briefing}\n\nExtract a single path of the file, to which I will dump the data."
@@ -32,3 +35,68 @@ class DumpFinalizedOutput(Action):
32
35
  )
33
36
  ok(to_dump, "Could not dump the data since the path is not specified.").finalized_dump_to(dump_path)
34
37
  return dump_path.as_posix()
38
+
39
+
40
+ class PersistentAll(Action):
41
+ """Persist all the data to a file."""
42
+
43
+ output_key: str = "persistent_count"
44
+ persist_dir: Optional[str] = None
45
+
46
+ async def _execute(
47
+ self,
48
+ task_input: Optional[Task] = None,
49
+ persist_dir: Optional[str | Path] = None,
50
+ **cxt,
51
+ ) -> int:
52
+ persist_dir = Path(
53
+ persist_dir
54
+ or self.persist_dir
55
+ or ok(
56
+ await self.awhich_pathstr(
57
+ f"{ok(task_input, 'Neither `task_input` and `dump_path` is provided.').briefing}\n\nExtract a single path of the file, to which I will persist the data."
58
+ ),
59
+ "Can not find the path of file to persist the data.",
60
+ )
61
+ )
62
+
63
+ count = 0
64
+ if persist_dir.is_file():
65
+ logger.warning("Dump should be a directory, but it is a file. Skip dumping.")
66
+ return count
67
+
68
+ for k, v in cxt.items():
69
+ final_dir = persist_dir.joinpath(k)
70
+ if isinstance(v, PersistentAble):
71
+ final_dir.mkdir(parents=True, exist_ok=True)
72
+ v.persist(final_dir)
73
+ count += 1
74
+ if isinstance(v, Iterable) and any(
75
+ persistent_ables := (pers for pers in v if isinstance(pers, PersistentAble))
76
+ ):
77
+ final_dir.mkdir(parents=True, exist_ok=True)
78
+ for per in persistent_ables:
79
+ per.persist(final_dir)
80
+ count += 1
81
+ logger.info(f"Persisted {count} objects to {persist_dir}")
82
+ return count
83
+
84
+
85
+ class RetrieveFromPersistent[T: PersistentAble](Action):
86
+ """Retrieve the object from the persistent file."""
87
+
88
+ output_key: str = "retrieved_obj"
89
+ """Retrieve the object from the persistent file."""
90
+ load_path: str
91
+ """The path of the persistent file or directory contains multiple file."""
92
+ retrieve_cls: Type[T]
93
+ """The class of the object to retrieve."""
94
+
95
+ async def _execute(self, /, **__) -> Optional[T | List[T]]:
96
+ logger.info(f"Retrieve `{self.retrieve_cls.__name__}` from persistent file: {self.load_path}")
97
+ if not (p := Path(self.load_path)).exists():
98
+ return None
99
+
100
+ if p.is_dir():
101
+ return [self.retrieve_cls.from_persistent(per) for per in p.glob("*")]
102
+ return self.retrieve_cls.from_persistent(self.load_path)