fabricatio 0.2.7.dev4__cp312-cp312-manylinux_2_34_x86_64.whl → 0.2.8__cp312-cp312-manylinux_2_34_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fabricatio/__init__.py +4 -11
- fabricatio/actions/article.py +226 -92
- fabricatio/actions/article_rag.py +86 -21
- fabricatio/actions/output.py +71 -3
- fabricatio/actions/rag.py +3 -3
- fabricatio/actions/rules.py +39 -0
- fabricatio/capabilities/advanced_judge.py +23 -0
- fabricatio/capabilities/censor.py +90 -0
- fabricatio/capabilities/check.py +195 -0
- fabricatio/capabilities/correct.py +160 -96
- fabricatio/capabilities/propose.py +20 -4
- fabricatio/capabilities/rag.py +5 -4
- fabricatio/capabilities/rating.py +68 -23
- fabricatio/capabilities/review.py +21 -190
- fabricatio/capabilities/task.py +9 -10
- fabricatio/config.py +11 -3
- fabricatio/fs/curd.py +4 -0
- fabricatio/models/action.py +24 -10
- fabricatio/models/adv_kwargs_types.py +25 -0
- fabricatio/models/extra/__init__.py +1 -0
- fabricatio/models/extra/advanced_judge.py +32 -0
- fabricatio/models/extra/article_base.py +324 -89
- fabricatio/models/extra/article_essence.py +49 -176
- fabricatio/models/extra/article_main.py +48 -127
- fabricatio/models/extra/article_outline.py +12 -152
- fabricatio/models/extra/article_proposal.py +29 -13
- fabricatio/models/extra/patches.py +7 -0
- fabricatio/models/extra/problem.py +153 -0
- fabricatio/models/extra/rule.py +65 -0
- fabricatio/models/generic.py +360 -88
- fabricatio/models/kwargs_types.py +23 -17
- fabricatio/models/role.py +4 -1
- fabricatio/models/task.py +1 -1
- fabricatio/models/tool.py +149 -14
- fabricatio/models/usages.py +61 -47
- fabricatio/models/utils.py +0 -46
- fabricatio/parser.py +7 -8
- fabricatio/rust.cpython-312-x86_64-linux-gnu.so +0 -0
- fabricatio/{_rust.pyi → rust.pyi} +50 -0
- fabricatio/{_rust_instances.py → rust_instances.py} +1 -1
- fabricatio/utils.py +54 -0
- fabricatio-0.2.8.data/scripts/tdown +0 -0
- {fabricatio-0.2.7.dev4.dist-info → fabricatio-0.2.8.dist-info}/METADATA +2 -1
- fabricatio-0.2.8.dist-info/RECORD +58 -0
- fabricatio/_rust.cpython-312-x86_64-linux-gnu.so +0 -0
- fabricatio-0.2.7.dev4.data/scripts/tdown +0 -0
- fabricatio-0.2.7.dev4.dist-info/RECORD +0 -47
- {fabricatio-0.2.7.dev4.dist-info → fabricatio-0.2.8.dist-info}/WHEEL +0 -0
- {fabricatio-0.2.7.dev4.dist-info → fabricatio-0.2.8.dist-info}/licenses/LICENSE +0 -0
fabricatio/__init__.py
CHANGED
@@ -1,10 +1,6 @@
|
|
1
1
|
"""Fabricatio is a Python library for building llm app using event-based agent structure."""
|
2
2
|
|
3
|
-
from
|
4
|
-
|
5
|
-
from fabricatio import actions, toolboxes, workflows
|
6
|
-
from fabricatio._rust import BibManager
|
7
|
-
from fabricatio._rust_instances import TEMPLATE_MANAGER
|
3
|
+
from fabricatio import actions, capabilities, toolboxes, workflows
|
8
4
|
from fabricatio.core import env
|
9
5
|
from fabricatio.journal import logger
|
10
6
|
from fabricatio.models import extra
|
@@ -14,6 +10,8 @@ from fabricatio.models.role import Role
|
|
14
10
|
from fabricatio.models.task import Task
|
15
11
|
from fabricatio.models.tool import ToolBox
|
16
12
|
from fabricatio.parser import Capture, GenericCapture, JsonCapture, PythonCapture
|
13
|
+
from fabricatio.rust import BibManager
|
14
|
+
from fabricatio.rust_instances import TEMPLATE_MANAGER
|
17
15
|
|
18
16
|
__all__ = [
|
19
17
|
"TEMPLATE_MANAGER",
|
@@ -29,15 +27,10 @@ __all__ = [
|
|
29
27
|
"ToolBox",
|
30
28
|
"WorkFlow",
|
31
29
|
"actions",
|
30
|
+
"capabilities",
|
32
31
|
"env",
|
33
32
|
"extra",
|
34
33
|
"logger",
|
35
34
|
"toolboxes",
|
36
35
|
"workflows",
|
37
36
|
]
|
38
|
-
|
39
|
-
|
40
|
-
if find_spec("pymilvus"):
|
41
|
-
from fabricatio.capabilities.rag import RAG
|
42
|
-
|
43
|
-
__all__ += ["RAG"]
|
fabricatio/actions/article.py
CHANGED
@@ -1,20 +1,28 @@
|
|
1
1
|
"""Actions for transmitting tasks to targets."""
|
2
2
|
|
3
|
+
from asyncio import gather
|
3
4
|
from pathlib import Path
|
4
5
|
from typing import Any, Callable, List, Optional
|
5
6
|
|
7
|
+
from fabricatio.capabilities.censor import Censor
|
8
|
+
from fabricatio.capabilities.correct import Correct
|
9
|
+
from fabricatio.capabilities.propose import Propose
|
6
10
|
from fabricatio.fs import safe_text_read
|
7
11
|
from fabricatio.journal import logger
|
8
12
|
from fabricatio.models.action import Action
|
13
|
+
from fabricatio.models.extra.article_base import ArticleRefSequencePatch
|
9
14
|
from fabricatio.models.extra.article_essence import ArticleEssence
|
10
15
|
from fabricatio.models.extra.article_main import Article
|
11
16
|
from fabricatio.models.extra.article_outline import ArticleOutline
|
12
17
|
from fabricatio.models.extra.article_proposal import ArticleProposal
|
18
|
+
from fabricatio.models.extra.rule import RuleSet
|
13
19
|
from fabricatio.models.task import Task
|
14
|
-
from fabricatio.
|
20
|
+
from fabricatio.rust import BibManager
|
21
|
+
from fabricatio.utils import ok
|
22
|
+
from more_itertools import filter_map
|
15
23
|
|
16
24
|
|
17
|
-
class ExtractArticleEssence(Action):
|
25
|
+
class ExtractArticleEssence(Action, Propose):
|
18
26
|
"""Extract the essence of article(s) in text format from the paths specified in the task dependencies.
|
19
27
|
|
20
28
|
Notes:
|
@@ -28,23 +36,65 @@ class ExtractArticleEssence(Action):
|
|
28
36
|
async def _execute(
|
29
37
|
self,
|
30
38
|
task_input: Task,
|
31
|
-
reader: Callable[[str], str] = lambda p: Path(p).read_text(encoding="utf-8"),
|
39
|
+
reader: Callable[[str], Optional[str]] = lambda p: Path(p).read_text(encoding="utf-8"),
|
32
40
|
**_,
|
33
|
-
) ->
|
41
|
+
) -> List[ArticleEssence]:
|
34
42
|
if not task_input.dependencies:
|
35
43
|
logger.info(err := "Task not approved, since no dependencies are provided.")
|
36
44
|
raise RuntimeError(err)
|
37
|
-
|
45
|
+
logger.info(f"Extracting article essence from {len(task_input.dependencies)} files.")
|
38
46
|
# trim the references
|
39
|
-
contents =
|
40
|
-
|
41
|
-
ArticleEssence,
|
42
|
-
contents,
|
43
|
-
system_message=f"# your personal briefing: \n{self.briefing}",
|
44
|
-
)
|
47
|
+
contents = list(filter_map(reader, task_input.dependencies))
|
48
|
+
logger.info(f"Read {len(task_input.dependencies)} to get {len(contents)} contents.")
|
45
49
|
|
50
|
+
out = []
|
51
|
+
|
52
|
+
for ess in await self.propose(
|
53
|
+
ArticleEssence,
|
54
|
+
[
|
55
|
+
f"{c}\n\n\nBased the provided academic article above, you need to extract the essence from it."
|
56
|
+
for c in contents
|
57
|
+
],
|
58
|
+
):
|
59
|
+
if ess is None:
|
60
|
+
logger.warning("Could not extract article essence")
|
61
|
+
else:
|
62
|
+
out.append(ess)
|
63
|
+
logger.info(f"Extracted {len(out)} article essence from {len(task_input.dependencies)} files.")
|
64
|
+
return out
|
65
|
+
|
66
|
+
|
67
|
+
class FixArticleEssence(Action):
|
68
|
+
"""Fix the article essence based on the bibtex key."""
|
69
|
+
|
70
|
+
output_key: str = "fixed_article_essence"
|
71
|
+
"""The key of the output data."""
|
46
72
|
|
47
|
-
|
73
|
+
async def _execute(
|
74
|
+
self,
|
75
|
+
bib_mgr: BibManager,
|
76
|
+
article_essence: List[ArticleEssence],
|
77
|
+
**_,
|
78
|
+
) -> List[ArticleEssence]:
|
79
|
+
out = []
|
80
|
+
count = 0
|
81
|
+
for a in article_essence:
|
82
|
+
if key := (bib_mgr.get_cite_key(a.title) or bib_mgr.get_cite_key_fuzzy(a.title)):
|
83
|
+
a.title = bib_mgr.get_title_by_key(key) or a.title
|
84
|
+
a.authors = bib_mgr.get_author_by_key(key) or a.authors
|
85
|
+
a.publication_year = bib_mgr.get_year_by_key(key) or a.publication_year
|
86
|
+
a.bibtex_cite_key = key
|
87
|
+
logger.info(f"Updated {a.title} with {key}")
|
88
|
+
out.append(a)
|
89
|
+
else:
|
90
|
+
logger.warning(f"No key found for {a.title}")
|
91
|
+
count += 1
|
92
|
+
if count:
|
93
|
+
logger.warning(f"{count} articles have no key")
|
94
|
+
return out
|
95
|
+
|
96
|
+
|
97
|
+
class GenerateArticleProposal(Action, Propose):
|
48
98
|
"""Generate an outline for the article based on the extracted essence."""
|
49
99
|
|
50
100
|
output_key: str = "article_proposal"
|
@@ -55,13 +105,14 @@ class GenerateArticleProposal(Action):
|
|
55
105
|
task_input: Optional[Task] = None,
|
56
106
|
article_briefing: Optional[str] = None,
|
57
107
|
article_briefing_path: Optional[str] = None,
|
108
|
+
langauge: Optional[str] = None,
|
58
109
|
**_,
|
59
110
|
) -> Optional[ArticleProposal]:
|
60
111
|
if article_briefing is None and article_briefing_path is None and task_input is None:
|
61
112
|
logger.error("Task not approved, since all inputs are None.")
|
62
113
|
return None
|
63
114
|
|
64
|
-
|
115
|
+
proposal = ok(
|
65
116
|
await self.propose(
|
66
117
|
ArticleProposal,
|
67
118
|
briefing := (
|
@@ -70,21 +121,25 @@ class GenerateArticleProposal(Action):
|
|
70
121
|
ok(
|
71
122
|
article_briefing_path
|
72
123
|
or await self.awhich_pathstr(
|
73
|
-
f"{task_input.briefing}\nExtract the path of file which contains the article briefing."
|
124
|
+
f"{ok(task_input).briefing}\nExtract the path of file which contains the article briefing."
|
74
125
|
),
|
75
126
|
"Could not find the path of file to read.",
|
76
127
|
)
|
77
128
|
)
|
78
129
|
),
|
79
|
-
|
80
|
-
|
130
|
+
),
|
131
|
+
"Could not generate the proposal.",
|
81
132
|
).update_ref(briefing)
|
133
|
+
if langauge:
|
134
|
+
proposal.language = langauge
|
82
135
|
|
136
|
+
return proposal
|
83
137
|
|
84
|
-
class GenerateOutline(Action):
|
85
|
-
"""Generate the article based on the outline."""
|
86
138
|
|
87
|
-
|
139
|
+
class GenerateInitialOutline(Action, Propose):
|
140
|
+
"""Generate the initial article outline based on the article proposal."""
|
141
|
+
|
142
|
+
output_key: str = "initial_article_outline"
|
88
143
|
"""The key of the output data."""
|
89
144
|
|
90
145
|
async def _execute(
|
@@ -92,112 +147,191 @@ class GenerateOutline(Action):
|
|
92
147
|
article_proposal: ArticleProposal,
|
93
148
|
**_,
|
94
149
|
) -> Optional[ArticleOutline]:
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
)
|
100
|
-
|
101
|
-
manual = await self.draft_rating_manual(
|
102
|
-
topic=(
|
103
|
-
topic
|
104
|
-
:= "Fix the internal referring error, make sure there is no more `ArticleRef` pointing to a non-existing article component."
|
150
|
+
return ok(
|
151
|
+
await self.propose(
|
152
|
+
ArticleOutline,
|
153
|
+
article_proposal.as_prompt(),
|
105
154
|
),
|
106
|
-
|
107
|
-
|
108
|
-
logger.warning(f"Found error in the outline: \n{err}")
|
109
|
-
out = await self.correct_obj(
|
110
|
-
out,
|
111
|
-
reference=f"# Referring Error\n{err}",
|
112
|
-
topic=topic,
|
113
|
-
rating_manual=manual,
|
114
|
-
supervisor_check=False,
|
115
|
-
)
|
116
|
-
return out.update_ref(article_proposal)
|
155
|
+
"Could not generate the initial outline.",
|
156
|
+
).update_ref(article_proposal)
|
117
157
|
|
118
158
|
|
119
|
-
class
|
120
|
-
"""
|
159
|
+
class FixIntrospectedErrors(Action, Censor):
|
160
|
+
"""Fix introspected errors in the article outline."""
|
121
161
|
|
122
|
-
output_key: str = "
|
162
|
+
output_key: str = "introspected_errors_fixed_outline"
|
163
|
+
"""The key of the output data."""
|
123
164
|
|
124
|
-
|
125
|
-
|
126
|
-
article_proposal
|
127
|
-
)
|
165
|
+
ruleset: Optional[RuleSet] = None
|
166
|
+
"""The ruleset to use to fix the introspected errors."""
|
128
167
|
|
168
|
+
async def _execute(
|
169
|
+
self,
|
170
|
+
article_outline: ArticleOutline,
|
171
|
+
ruleset: Optional[RuleSet] = None,
|
172
|
+
**_,
|
173
|
+
) -> Optional[ArticleOutline]:
|
174
|
+
while pack := article_outline.find_introspected():
|
175
|
+
component, err = ok(pack)
|
176
|
+
logger.warning(f"Found introspected error: {err}")
|
177
|
+
corrected = ok(
|
178
|
+
await self.censor_obj(
|
179
|
+
component,
|
180
|
+
ruleset=ok(ruleset or self.ruleset, "No ruleset provided"),
|
181
|
+
reference=f"# Original Article Outline\n{article_outline.display()}\n# Some Basic errors found from `{component.title}` that need to be fixed\n{err}",
|
182
|
+
),
|
183
|
+
"Could not correct the component.",
|
184
|
+
)
|
185
|
+
component.update_from(corrected)
|
129
186
|
|
130
|
-
|
131
|
-
"""Correct the outline of the article."""
|
187
|
+
return article_outline
|
132
188
|
|
133
|
-
|
189
|
+
|
190
|
+
class FixIllegalReferences(Action, Censor):
|
191
|
+
"""Fix illegal references in the article outline."""
|
192
|
+
|
193
|
+
output_key: str = "illegal_references_fixed_outline"
|
134
194
|
"""The key of the output data."""
|
135
195
|
|
196
|
+
ruleset: Optional[RuleSet] = None
|
197
|
+
"""Ruleset to use to fix the illegal references."""
|
198
|
+
|
136
199
|
async def _execute(
|
137
200
|
self,
|
138
201
|
article_outline: ArticleOutline,
|
202
|
+
ruleset: Optional[RuleSet] = None,
|
139
203
|
**_,
|
204
|
+
) -> Optional[ArticleOutline]:
|
205
|
+
while pack := article_outline.find_illegal_ref(gather_identical=True):
|
206
|
+
refs, err = ok(pack)
|
207
|
+
logger.warning(f"Found illegal referring error: {err}")
|
208
|
+
corrected_ref = ok(
|
209
|
+
await self.censor_obj(
|
210
|
+
refs[0], # pyright: ignore [reportIndexIssue]
|
211
|
+
ruleset=ok(ruleset or self.ruleset, "No ruleset provided"),
|
212
|
+
reference=f"# Original Article Outline\n{article_outline.display()}\n# Some Basic errors found that need to be fixed\n{err}",
|
213
|
+
)
|
214
|
+
)
|
215
|
+
for ref in refs:
|
216
|
+
ref.update_from(corrected_ref) # pyright: ignore [reportAttributeAccessIssue]
|
217
|
+
|
218
|
+
return article_outline.update_ref(article_outline)
|
219
|
+
|
220
|
+
|
221
|
+
class TweakOutlineForwardRef(Action, Censor):
|
222
|
+
"""Tweak the forward references in the article outline.
|
223
|
+
|
224
|
+
Ensures that the conclusions of the current chapter effectively support the analysis of subsequent chapters.
|
225
|
+
"""
|
226
|
+
|
227
|
+
output_key: str = "article_outline_fw_ref_checked"
|
228
|
+
ruleset: Optional[RuleSet] = None
|
229
|
+
"""Ruleset to use to fix the illegal references."""
|
230
|
+
|
231
|
+
async def _execute(
|
232
|
+
self, article_outline: ArticleOutline, ruleset: Optional[RuleSet] = None, **cxt
|
140
233
|
) -> ArticleOutline:
|
141
|
-
return
|
142
|
-
article_outline
|
234
|
+
return await self._inner(
|
235
|
+
article_outline,
|
236
|
+
ruleset=ok(ruleset or self.ruleset, "No ruleset provided"),
|
237
|
+
field_name="support_to",
|
143
238
|
)
|
144
239
|
|
240
|
+
async def _inner(self, article_outline: ArticleOutline, ruleset: RuleSet, field_name: str) -> ArticleOutline:
|
241
|
+
for a in article_outline.iter_dfs():
|
242
|
+
if judge := await self.evidently_judge(
|
243
|
+
f"{article_outline.as_prompt()}\n\n{a.display()}\n"
|
244
|
+
f"Does the `{a.__class__.__name__}`'s `{field_name}` field need to be extended or tweaked?"
|
245
|
+
):
|
246
|
+
patch = ArticleRefSequencePatch.default()
|
247
|
+
patch.tweaked = getattr(a, field_name)
|
248
|
+
|
249
|
+
await self.censor_obj_inplace(
|
250
|
+
patch,
|
251
|
+
ruleset=ruleset,
|
252
|
+
reference=f"{article_outline.as_prompt()}\n"
|
253
|
+
f"The Article component titled `{a.title}` whose `{field_name}` field needs to be extended or tweaked.\n"
|
254
|
+
f"# Judgement\n{judge.display()}",
|
255
|
+
)
|
256
|
+
return article_outline
|
257
|
+
|
145
258
|
|
146
|
-
class
|
259
|
+
class TweakOutlineBackwardRef(TweakOutlineForwardRef):
|
260
|
+
"""Tweak the backward references in the article outline.
|
261
|
+
|
262
|
+
Ensures that the prerequisites of the current chapter are correctly referenced in the `depend_on` field.
|
263
|
+
"""
|
264
|
+
|
265
|
+
output_key: str = "article_outline_bw_ref_checked"
|
266
|
+
ruleset: Optional[RuleSet] = None
|
267
|
+
|
268
|
+
async def _execute(
|
269
|
+
self, article_outline: ArticleOutline, ruleset: Optional[RuleSet] = None, **cxt
|
270
|
+
) -> ArticleOutline:
|
271
|
+
return await self._inner(
|
272
|
+
article_outline,
|
273
|
+
ruleset=ok(ruleset or self.ruleset, "No ruleset provided"),
|
274
|
+
field_name="depend_on",
|
275
|
+
)
|
276
|
+
|
277
|
+
|
278
|
+
class GenerateArticle(Action, Censor):
|
147
279
|
"""Generate the article based on the outline."""
|
148
280
|
|
149
281
|
output_key: str = "article"
|
150
282
|
"""The key of the output data."""
|
283
|
+
ruleset: Optional[RuleSet] = None
|
151
284
|
|
152
285
|
async def _execute(
|
153
286
|
self,
|
154
287
|
article_outline: ArticleOutline,
|
288
|
+
ruleset: Optional[RuleSet] = None,
|
155
289
|
**_,
|
156
290
|
) -> Optional[Article]:
|
157
|
-
article: Article = Article.from_outline(article_outline).update_ref(
|
158
|
-
|
159
|
-
writing_manual = await self.draft_rating_manual(
|
160
|
-
topic=(
|
161
|
-
topic_1
|
162
|
-
:= "improve the content of the subsection to fit the outline. SHALL never add or remove any section or subsection, you can only add or delete paragraphs within the subsection."
|
163
|
-
),
|
164
|
-
)
|
165
|
-
err_resolve_manual = await self.draft_rating_manual(
|
166
|
-
topic=(topic_2 := "this article component has violated the constrain, please correct it.")
|
291
|
+
article: Article = Article.from_outline(ok(article_outline, "Article outline not specified.")).update_ref(
|
292
|
+
article_outline
|
167
293
|
)
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
reference=(
|
175
|
-
ref := f"{article_outline.referenced.as_prompt()}\n" + "\n".join(d.display() for d in deps)
|
176
|
-
),
|
177
|
-
topic=topic_1,
|
178
|
-
rating_manual=writing_manual,
|
179
|
-
supervisor_check=False,
|
180
|
-
),
|
181
|
-
"Could not correct the article component.",
|
182
|
-
)
|
183
|
-
while err := c.resolve_update_error(out):
|
184
|
-
logger.warning(f"Found error in the article component: \n{err}")
|
185
|
-
out = ok(
|
186
|
-
await self.correct_obj(
|
187
|
-
out,
|
188
|
-
reference=f"{ref}\n\n# Violated Error\n{err}",
|
189
|
-
topic=topic_2,
|
190
|
-
rating_manual=err_resolve_manual,
|
191
|
-
supervisor_check=False,
|
192
|
-
),
|
193
|
-
"Could not correct the article component.",
|
294
|
+
|
295
|
+
await gather(
|
296
|
+
*[
|
297
|
+
self.censor_obj_inplace(
|
298
|
+
subsec,
|
299
|
+
ruleset=ok(ruleset or self.ruleset, "No ruleset provided"),
|
300
|
+
reference=f"# Original Article Outline\n{article_outline.display()}\n# Error Need to be fixed\n{err}",
|
194
301
|
)
|
302
|
+
for _, __, subsec in article.iter_subsections()
|
303
|
+
if (err := subsec.introspect())
|
304
|
+
],
|
305
|
+
return_exceptions=True,
|
306
|
+
)
|
195
307
|
|
196
|
-
c.update_from(out)
|
197
308
|
return article
|
198
309
|
|
199
310
|
|
200
|
-
class
|
311
|
+
class CorrectProposal(Action, Censor):
|
312
|
+
"""Correct the proposal of the article."""
|
313
|
+
|
314
|
+
output_key: str = "corrected_proposal"
|
315
|
+
|
316
|
+
async def _execute(self, article_proposal: ArticleProposal, **_) -> Any:
|
317
|
+
raise NotImplementedError("Not implemented.")
|
318
|
+
|
319
|
+
|
320
|
+
class CorrectOutline(Action, Correct):
|
321
|
+
"""Correct the outline of the article."""
|
322
|
+
|
323
|
+
output_key: str = "corrected_outline"
|
324
|
+
"""The key of the output data."""
|
325
|
+
|
326
|
+
async def _execute(
|
327
|
+
self,
|
328
|
+
article_outline: ArticleOutline,
|
329
|
+
**_,
|
330
|
+
) -> ArticleOutline:
|
331
|
+
raise NotImplementedError("Not implemented.")
|
332
|
+
|
333
|
+
|
334
|
+
class CorrectArticle(Action, Correct):
|
201
335
|
"""Correct the article based on the outline."""
|
202
336
|
|
203
337
|
output_key: str = "corrected_article"
|
@@ -209,4 +343,4 @@ class CorrectArticle(Action):
|
|
209
343
|
article_outline: ArticleOutline,
|
210
344
|
**_,
|
211
345
|
) -> Article:
|
212
|
-
|
346
|
+
raise NotImplementedError("Not implemented.")
|
@@ -1,35 +1,100 @@
|
|
1
1
|
"""A module for writing articles using RAG (Retrieval-Augmented Generation) capabilities."""
|
2
2
|
|
3
|
+
from asyncio import gather
|
3
4
|
from typing import Optional
|
4
5
|
|
6
|
+
from fabricatio.capabilities.censor import Censor
|
5
7
|
from fabricatio.capabilities.rag import RAG
|
6
|
-
from fabricatio.journal import logger
|
7
8
|
from fabricatio.models.action import Action
|
8
|
-
from fabricatio.models.extra.article_main import Article
|
9
|
-
from fabricatio.models.extra.
|
9
|
+
from fabricatio.models.extra.article_main import Article, ArticleParagraphSequencePatch, ArticleSubsection
|
10
|
+
from fabricatio.models.extra.rule import RuleSet
|
11
|
+
from fabricatio.utils import ok
|
10
12
|
|
11
13
|
|
12
|
-
class
|
13
|
-
"""Write an article based on the provided outline.
|
14
|
+
class TweakArticleRAG(Action, RAG, Censor):
|
15
|
+
"""Write an article based on the provided outline.
|
14
16
|
|
15
|
-
|
17
|
+
This class inherits from `Action`, `RAG`, and `Censor` to provide capabilities for writing and refining articles
|
18
|
+
using Retrieval-Augmented Generation (RAG) techniques. It processes an article outline, enhances subsections by
|
19
|
+
searching for related references, and applies censoring rules to ensure compliance with the provided ruleset.
|
16
20
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
21
|
+
Attributes:
|
22
|
+
output_key (str): The key used to store the output of the action.
|
23
|
+
ruleset (Optional[RuleSet]): The ruleset to be used for censoring the article.
|
24
|
+
"""
|
25
|
+
|
26
|
+
output_key: str = "rag_tweaked_article"
|
27
|
+
"""The key used to store the output of the action."""
|
28
|
+
|
29
|
+
ruleset: Optional[RuleSet] = None
|
30
|
+
"""The ruleset to be used for censoring the article."""
|
31
|
+
|
32
|
+
async def _execute(
|
33
|
+
self,
|
34
|
+
article: Article,
|
35
|
+
collection_name: str = "article_essence",
|
36
|
+
ruleset: Optional[RuleSet] = None,
|
37
|
+
parallel: bool = False,
|
38
|
+
**cxt,
|
39
|
+
) -> Optional[Article]:
|
40
|
+
"""Write an article based on the provided outline.
|
41
|
+
|
42
|
+
This method processes the article outline, either in parallel or sequentially, by enhancing each subsection
|
43
|
+
with relevant references and applying censoring rules.
|
26
44
|
|
45
|
+
Args:
|
46
|
+
article (Article): The article to be processed.
|
47
|
+
collection_name (str): The name of the collection to view for processing.
|
48
|
+
ruleset (Optional[RuleSet]): The ruleset to apply for censoring. If not provided, the class's ruleset is used.
|
49
|
+
parallel (bool): If True, process subsections in parallel. Otherwise, process them sequentially.
|
50
|
+
**cxt: Additional context parameters.
|
27
51
|
|
28
|
-
|
29
|
-
|
52
|
+
Returns:
|
53
|
+
Optional[Article]: The processed article with enhanced subsections and applied censoring rules.
|
54
|
+
"""
|
55
|
+
self.view(collection_name)
|
30
56
|
|
31
|
-
|
57
|
+
if parallel:
|
58
|
+
await gather(
|
59
|
+
*[
|
60
|
+
self._inner(article, subsec, ok(ruleset or self.ruleset, "No ruleset provided!"))
|
61
|
+
for _, __, subsec in article.iter_subsections()
|
62
|
+
],
|
63
|
+
return_exceptions=True,
|
64
|
+
)
|
65
|
+
else:
|
66
|
+
for _, __, subsec in article.iter_subsections():
|
67
|
+
await self._inner(article, subsec, ok(ruleset or self.ruleset, "No ruleset provided!"))
|
68
|
+
return article
|
32
69
|
|
33
|
-
async def
|
34
|
-
"""
|
35
|
-
|
70
|
+
async def _inner(self, article: Article, subsec: ArticleSubsection, ruleset: RuleSet) -> None:
|
71
|
+
"""Enhance a subsection of the article with references and apply censoring rules.
|
72
|
+
|
73
|
+
This method refines the query for the subsection, retrieves related references, and applies censoring rules
|
74
|
+
to the subsection's paragraphs.
|
75
|
+
|
76
|
+
Args:
|
77
|
+
article (Article): The article containing the subsection.
|
78
|
+
subsec (ArticleSubsection): The subsection to be enhanced.
|
79
|
+
ruleset (RuleSet): The ruleset to apply for censoring.
|
80
|
+
|
81
|
+
Returns:
|
82
|
+
None
|
83
|
+
"""
|
84
|
+
refind_q = ok(
|
85
|
+
await self.arefined_query(
|
86
|
+
f"{article.referenced.as_prompt()}\n"
|
87
|
+
f"# Subsection requiring reference enhancement\n"
|
88
|
+
f"{subsec.display()}\n"
|
89
|
+
f"# Requirement\n"
|
90
|
+
f"Search related articles in the base to find reference candidates, "
|
91
|
+
f"prioritizing both original article language and English usage",
|
92
|
+
)
|
93
|
+
)
|
94
|
+
patch = ArticleParagraphSequencePatch.default()
|
95
|
+
patch.tweaked = subsec.paragraphs
|
96
|
+
await self.censor_obj_inplace(
|
97
|
+
patch,
|
98
|
+
ruleset=ruleset,
|
99
|
+
reference=await self.aretrieve_compact(refind_q, final_limit=30),
|
100
|
+
)
|
fabricatio/actions/output.py
CHANGED
@@ -1,18 +1,20 @@
|
|
1
1
|
"""Dump the finalized output to a file."""
|
2
2
|
|
3
3
|
from pathlib import Path
|
4
|
-
from typing import Optional
|
4
|
+
from typing import Iterable, List, Optional, Type
|
5
5
|
|
6
|
+
from fabricatio.journal import logger
|
6
7
|
from fabricatio.models.action import Action
|
7
|
-
from fabricatio.models.generic import FinalizedDumpAble
|
8
|
+
from fabricatio.models.generic import FinalizedDumpAble, PersistentAble
|
8
9
|
from fabricatio.models.task import Task
|
9
|
-
from fabricatio.
|
10
|
+
from fabricatio.utils import ok
|
10
11
|
|
11
12
|
|
12
13
|
class DumpFinalizedOutput(Action):
|
13
14
|
"""Dump the finalized output to a file."""
|
14
15
|
|
15
16
|
output_key: str = "dump_path"
|
17
|
+
dump_path: Optional[str] = None
|
16
18
|
|
17
19
|
async def _execute(
|
18
20
|
self,
|
@@ -23,6 +25,7 @@ class DumpFinalizedOutput(Action):
|
|
23
25
|
) -> str:
|
24
26
|
dump_path = Path(
|
25
27
|
dump_path
|
28
|
+
or self.dump_path
|
26
29
|
or ok(
|
27
30
|
await self.awhich_pathstr(
|
28
31
|
f"{ok(task_input, 'Neither `task_input` and `dump_path` is provided.').briefing}\n\nExtract a single path of the file, to which I will dump the data."
|
@@ -32,3 +35,68 @@ class DumpFinalizedOutput(Action):
|
|
32
35
|
)
|
33
36
|
ok(to_dump, "Could not dump the data since the path is not specified.").finalized_dump_to(dump_path)
|
34
37
|
return dump_path.as_posix()
|
38
|
+
|
39
|
+
|
40
|
+
class PersistentAll(Action):
|
41
|
+
"""Persist all the data to a file."""
|
42
|
+
|
43
|
+
output_key: str = "persistent_count"
|
44
|
+
persist_dir: Optional[str] = None
|
45
|
+
|
46
|
+
async def _execute(
|
47
|
+
self,
|
48
|
+
task_input: Optional[Task] = None,
|
49
|
+
persist_dir: Optional[str | Path] = None,
|
50
|
+
**cxt,
|
51
|
+
) -> int:
|
52
|
+
persist_dir = Path(
|
53
|
+
persist_dir
|
54
|
+
or self.persist_dir
|
55
|
+
or ok(
|
56
|
+
await self.awhich_pathstr(
|
57
|
+
f"{ok(task_input, 'Neither `task_input` and `dump_path` is provided.').briefing}\n\nExtract a single path of the file, to which I will persist the data."
|
58
|
+
),
|
59
|
+
"Can not find the path of file to persist the data.",
|
60
|
+
)
|
61
|
+
)
|
62
|
+
|
63
|
+
count = 0
|
64
|
+
if persist_dir.is_file():
|
65
|
+
logger.warning("Dump should be a directory, but it is a file. Skip dumping.")
|
66
|
+
return count
|
67
|
+
|
68
|
+
for k, v in cxt.items():
|
69
|
+
final_dir = persist_dir.joinpath(k)
|
70
|
+
if isinstance(v, PersistentAble):
|
71
|
+
final_dir.mkdir(parents=True, exist_ok=True)
|
72
|
+
v.persist(final_dir)
|
73
|
+
count += 1
|
74
|
+
if isinstance(v, Iterable) and any(
|
75
|
+
persistent_ables := (pers for pers in v if isinstance(pers, PersistentAble))
|
76
|
+
):
|
77
|
+
final_dir.mkdir(parents=True, exist_ok=True)
|
78
|
+
for per in persistent_ables:
|
79
|
+
per.persist(final_dir)
|
80
|
+
count += 1
|
81
|
+
logger.info(f"Persisted {count} objects to {persist_dir}")
|
82
|
+
return count
|
83
|
+
|
84
|
+
|
85
|
+
class RetrieveFromPersistent[T: PersistentAble](Action):
|
86
|
+
"""Retrieve the object from the persistent file."""
|
87
|
+
|
88
|
+
output_key: str = "retrieved_obj"
|
89
|
+
"""Retrieve the object from the persistent file."""
|
90
|
+
load_path: str
|
91
|
+
"""The path of the persistent file or directory contains multiple file."""
|
92
|
+
retrieve_cls: Type[T]
|
93
|
+
"""The class of the object to retrieve."""
|
94
|
+
|
95
|
+
async def _execute(self, /, **__) -> Optional[T | List[T]]:
|
96
|
+
logger.info(f"Retrieve `{self.retrieve_cls.__name__}` from persistent file: {self.load_path}")
|
97
|
+
if not (p := Path(self.load_path)).exists():
|
98
|
+
return None
|
99
|
+
|
100
|
+
if p.is_dir():
|
101
|
+
return [self.retrieve_cls.from_persistent(per) for per in p.glob("*")]
|
102
|
+
return self.retrieve_cls.from_persistent(self.load_path)
|