fabricatio 0.2.11.dev3__cp312-cp312-manylinux_2_34_x86_64.whl → 0.2.12__cp312-cp312-manylinux_2_34_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fabricatio/actions/article_rag.py +172 -71
- fabricatio/actions/output.py +43 -2
- fabricatio/capabilities/advanced_rag.py +56 -0
- fabricatio/capabilities/rag.py +4 -4
- fabricatio/config.py +3 -3
- fabricatio/fs/curd.py +1 -1
- fabricatio/models/action.py +6 -8
- fabricatio/models/extra/aricle_rag.py +42 -19
- fabricatio/models/extra/article_base.py +79 -37
- fabricatio/models/extra/article_main.py +89 -45
- fabricatio/models/extra/article_outline.py +41 -3
- fabricatio/models/generic.py +10 -6
- fabricatio/models/kwargs_types.py +1 -1
- fabricatio/models/role.py +5 -4
- fabricatio/rust.cpython-312-x86_64-linux-gnu.so +0 -0
- fabricatio/rust.pyi +34 -1
- fabricatio/utils.py +1 -1
- fabricatio-0.2.12.data/scripts/tdown +0 -0
- {fabricatio-0.2.11.dev3.dist-info → fabricatio-0.2.12.dist-info}/METADATA +1 -1
- {fabricatio-0.2.11.dev3.dist-info → fabricatio-0.2.12.dist-info}/RECORD +22 -21
- fabricatio-0.2.11.dev3.data/scripts/tdown +0 -0
- {fabricatio-0.2.11.dev3.dist-info → fabricatio-0.2.12.dist-info}/WHEEL +0 -0
- {fabricatio-0.2.11.dev3.dist-info → fabricatio-0.2.12.dist-info}/licenses/LICENSE +0 -0
@@ -4,7 +4,10 @@ from asyncio import gather
|
|
4
4
|
from pathlib import Path
|
5
5
|
from typing import List, Optional
|
6
6
|
|
7
|
+
from pydantic import Field, PositiveInt
|
8
|
+
|
7
9
|
from fabricatio import BibManager
|
10
|
+
from fabricatio.capabilities.advanced_rag import AdvancedRAG
|
8
11
|
from fabricatio.capabilities.censor import Censor
|
9
12
|
from fabricatio.capabilities.extract import Extract
|
10
13
|
from fabricatio.capabilities.rag import RAG
|
@@ -16,59 +19,63 @@ from fabricatio.models.extra.article_essence import ArticleEssence
|
|
16
19
|
from fabricatio.models.extra.article_main import Article, ArticleChapter, ArticleSection, ArticleSubsection
|
17
20
|
from fabricatio.models.extra.article_outline import ArticleOutline
|
18
21
|
from fabricatio.models.extra.rule import RuleSet
|
22
|
+
from fabricatio.models.kwargs_types import ChooseKwargs, LLMKwargs
|
19
23
|
from fabricatio.utils import ask_retain, ok
|
20
24
|
|
25
|
+
TYPST_CITE_USAGE = (
|
26
|
+
"citation number is REQUIRED to cite any reference!,for example in Auther Pattern: 'Doe et al.[[1]], Jack et al.[[2]]' or in Sentence Suffix Sattern: 'Global requirement is incresing[[1]].'\n"
|
27
|
+
"Everything is build upon the typst language, which is similar to latex, \n"
|
28
|
+
"Legal citing syntax examples(seperated by |): [[1]]|[[1,2]]|[[1-3]]|[[12,13-15]]|[[1-3,5-7]]\n"
|
29
|
+
"Illegal citing syntax examples(seperated by |): [[1],[2],[3]]|[[1],[1-2]]\n"
|
30
|
+
"Those reference mark shall not be omitted during the extraction\n"
|
31
|
+
"It's recommended to cite multiple references that supports your conclusion at a time.\n"
|
32
|
+
"Wrap inline expression with '\\(' and '\\)',like '\\(>5m\\)' '\\(89%\\)', and wrap block equation with '\\[' and '\\]'.\n"
|
33
|
+
"In addition to that, you can add a label outside the block equation which can be used as a cross reference identifier, the label is a string wrapped in `<` and `>` like `<energy-release-rate-equation>`.Note that the label string should be a summarizing title for the equation being labeled.\n"
|
34
|
+
"you can refer to that label by using the syntax with prefix of `@eqt:`, which indicate that this notation is citing a label from the equations. For example ' @eqt:energy-release-rate-equation ' DO remember that the notation shall have both suffixed and prefixed space char which enable the compiler to distinguish the notation from the plaintext."
|
35
|
+
"Below is two usage example:\n"
|
36
|
+
"```typst\n"
|
37
|
+
"See @eqt:mass-energy-equation , it's the foundation of physics.\n"
|
38
|
+
"\\[\n"
|
39
|
+
"E = m c^2\n"
|
40
|
+
"\\] <mass-energy-equation>\n\n\n"
|
41
|
+
"In @eqt:mass-energy-equation , \\(m\\) stands for mass, \\(c\\) stands for speed of light, and \\(E\\) stands for energy. \n"
|
42
|
+
"```\n"
|
43
|
+
)
|
44
|
+
|
21
45
|
|
22
46
|
class WriteArticleContentRAG(Action, RAG, Extract):
|
23
47
|
"""Write an article based on the provided outline."""
|
24
48
|
|
49
|
+
search_increment_multiplier: float = 1.6
|
50
|
+
"""The increment multiplier of the search increment."""
|
25
51
|
ref_limit: int = 35
|
26
52
|
"""The limit of references to be retrieved"""
|
27
|
-
threshold: float = 0.
|
53
|
+
threshold: float = 0.62
|
28
54
|
"""The threshold of relevance"""
|
29
|
-
extractor_model:
|
55
|
+
extractor_model: LLMKwargs
|
30
56
|
"""The model to use for extracting the content from the retrieved references."""
|
31
|
-
query_model:
|
57
|
+
query_model: LLMKwargs
|
32
58
|
"""The model to use for querying the database"""
|
33
59
|
supervisor: bool = False
|
34
60
|
"""Whether to use supervisor mode"""
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
"Illegal citing syntax examples(seperated by |): [[1],[2],[3]]|[[1],[1-2]]\n"
|
40
|
-
"Those reference mark shall not be omitted during the extraction\n"
|
41
|
-
"It's recommended to cite multiple references that supports your conclusion at a time.\n"
|
42
|
-
"Wrapp inline expression using $ $, and wrapp block equation using $$ $$."
|
43
|
-
"In addition to that, you can add a label outside the block equation which can be used as a cross reference identifier, the label is a string wrapped in `<` and `>`,"
|
44
|
-
"you can refer to that label by using the syntax with prefix of `@eqt:`"
|
45
|
-
"Below is a usage example:\n"
|
46
|
-
"```typst\n"
|
47
|
-
"See @eqt:mass-energy-equation , it's the foundation of physics.\n"
|
48
|
-
"$$\n"
|
49
|
-
"E = m c^2"
|
50
|
-
"$$\n"
|
51
|
-
"<mass-energy-equation>\n\n"
|
52
|
-
"In @eqt:mass-energy-equation , $m$ stands for mass, $c$ stands for speed of light, and $E$ stands for energy. \n"
|
53
|
-
"```"
|
54
|
-
)
|
61
|
+
result_per_query: PositiveInt = 4
|
62
|
+
"""The number of results to be returned per query."""
|
63
|
+
req: str = TYPST_CITE_USAGE
|
64
|
+
"""The req of the write article content."""
|
55
65
|
|
56
66
|
async def _execute(
|
57
67
|
self,
|
58
68
|
article_outline: ArticleOutline,
|
59
|
-
collection_name: str =
|
69
|
+
collection_name: Optional[str] = None,
|
60
70
|
supervisor: Optional[bool] = None,
|
61
71
|
**cxt,
|
62
72
|
) -> Article:
|
63
73
|
article = Article.from_outline(article_outline).update_ref(article_outline)
|
64
|
-
|
74
|
+
self.target_collection = collection_name or self.safe_target_collection
|
65
75
|
if supervisor or (supervisor is None and self.supervisor):
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
for chap, sec, subsec in article.iter_subsections()
|
70
|
-
]
|
71
|
-
)
|
76
|
+
for chap, sec, subsec in article.iter_subsections():
|
77
|
+
await self._supervisor_inner(article, article_outline, chap, sec, subsec)
|
78
|
+
|
72
79
|
else:
|
73
80
|
await gather(
|
74
81
|
*[
|
@@ -92,22 +99,29 @@ class WriteArticleContentRAG(Action, RAG, Extract):
|
|
92
99
|
from questionary import confirm, text
|
93
100
|
from rich import print as r_print
|
94
101
|
|
95
|
-
|
96
|
-
|
97
|
-
cm = CitationManager(article_chunks=await ask_retain([r.chunk for r in ret], ret)).set_cite_number_all()
|
102
|
+
cm = CitationManager()
|
103
|
+
await self.search_database(article, article_outline, chap, sec, subsec, cm)
|
98
104
|
|
99
105
|
raw = await self.write_raw(article, article_outline, chap, sec, subsec, cm)
|
100
106
|
r_print(raw)
|
101
107
|
|
102
108
|
while not await confirm("Accept this version and continue?").ask_async():
|
103
|
-
if await
|
104
|
-
|
105
|
-
|
109
|
+
if inst := await text("Search for more refs for additional spec.").ask_async():
|
110
|
+
await self.search_database(
|
111
|
+
article,
|
112
|
+
article_outline,
|
113
|
+
chap,
|
114
|
+
sec,
|
115
|
+
subsec,
|
116
|
+
cm,
|
117
|
+
supervisor=True,
|
118
|
+
extra_instruction=inst,
|
119
|
+
)
|
106
120
|
|
107
|
-
instruction
|
108
|
-
|
109
|
-
if await
|
110
|
-
raw =
|
121
|
+
if instruction := await text("Enter the instructions to improve").ask_async():
|
122
|
+
raw = await self.write_raw(article, article_outline, chap, sec, subsec, cm, instruction)
|
123
|
+
if edt := await text("Edit", default=raw).ask_async():
|
124
|
+
raw = edt
|
111
125
|
|
112
126
|
r_print(raw)
|
113
127
|
|
@@ -121,11 +135,14 @@ class WriteArticleContentRAG(Action, RAG, Extract):
|
|
121
135
|
sec: ArticleSection,
|
122
136
|
subsec: ArticleSubsection,
|
123
137
|
) -> ArticleSubsection:
|
124
|
-
|
125
|
-
|
138
|
+
cm = CitationManager()
|
139
|
+
|
140
|
+
await self.search_database(article, article_outline, chap, sec, subsec, cm)
|
126
141
|
|
127
142
|
raw_paras = await self.write_raw(article, article_outline, chap, sec, subsec, cm)
|
128
143
|
|
144
|
+
raw_paras = "\n".join(p for p in raw_paras.splitlines() if p and not p.endswith("**") and not p.startswith("#"))
|
145
|
+
|
129
146
|
return await self.extract_new_subsec(subsec, raw_paras, cm)
|
130
147
|
|
131
148
|
async def extract_new_subsec(
|
@@ -139,11 +156,13 @@ class WriteArticleContentRAG(Action, RAG, Extract):
|
|
139
156
|
f"Above is the subsection titled `{subsec.title}`.\n"
|
140
157
|
f"I need you to extract the content to update my subsection obj provided below.\n{self.req}"
|
141
158
|
f"{subsec.display()}\n",
|
159
|
+
**self.extractor_model,
|
142
160
|
),
|
143
161
|
"Failed to propose new subsection.",
|
144
162
|
)
|
145
163
|
for p in new_subsec.paragraphs:
|
146
|
-
p.content = cm.apply(p.content)
|
164
|
+
p.content = cm.apply(p.content)
|
165
|
+
p.description = cm.apply(p.description)
|
147
166
|
subsec.update_from(new_subsec)
|
148
167
|
logger.debug(f"{subsec.title}:rpl\n{subsec.display()}")
|
149
168
|
return subsec
|
@@ -159,23 +178,16 @@ class WriteArticleContentRAG(Action, RAG, Extract):
|
|
159
178
|
extra_instruction: str = "",
|
160
179
|
) -> str:
|
161
180
|
"""Write the raw paragraphs of the subsec."""
|
162
|
-
return (
|
163
|
-
(
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
)
|
173
|
-
.replace(r" \( ", "$")
|
174
|
-
.replace(r" \) ", "$")
|
175
|
-
.replace(r"\(", "$")
|
176
|
-
.replace(r"\)", "$")
|
177
|
-
.replace("\\[\n", "$$\n")
|
178
|
-
.replace("\n\\]", "\n$$")
|
181
|
+
return await self.aask(
|
182
|
+
f"{cm.as_prompt()}\nAbove is some related reference from other auther retrieved for you."
|
183
|
+
f"{article_outline.finalized_dump()}\n\nAbove is my article outline, I m writing graduate thesis titled `{article.title}`. "
|
184
|
+
f"More specifically, i m witting the Chapter `{chap.title}` >> Section `{sec.title}` >> Subsection `{subsec.title}`.\n"
|
185
|
+
f"Please help me write the paragraphs of the subsec mentioned above, which is `{subsec.title}`.\n"
|
186
|
+
f"{self.req}\n"
|
187
|
+
f"You SHALL use `{article.language}` as writing language.\n{extra_instruction}\n"
|
188
|
+
f"Do not use numbered list to display the outcome, you should regard you are writing the main text of the thesis.\n"
|
189
|
+
f"You should not copy others' works from the references directly on to my thesis, we can only harness the conclusion they have drawn.\n"
|
190
|
+
f"No extra explanation is allowed."
|
179
191
|
)
|
180
192
|
|
181
193
|
async def search_database(
|
@@ -185,28 +197,117 @@ class WriteArticleContentRAG(Action, RAG, Extract):
|
|
185
197
|
chap: ArticleChapter,
|
186
198
|
sec: ArticleSection,
|
187
199
|
subsec: ArticleSubsection,
|
200
|
+
cm: CitationManager,
|
188
201
|
extra_instruction: str = "",
|
189
202
|
supervisor: bool = False,
|
190
|
-
) ->
|
203
|
+
) -> None:
|
191
204
|
"""Search database for related references."""
|
205
|
+
search_req = (
|
206
|
+
f"{article_outline.finalized_dump()}\n\nAbove is my article outline, I m writing graduate thesis titled `{article.title}`. "
|
207
|
+
f"More specifically, i m witting the Chapter `{chap.title}` >> Section `{sec.title}` >> Subsection `{subsec.title}`.\n"
|
208
|
+
f"I need to search related references to build up the content of the subsec mentioned above, which is `{subsec.title}`.\n"
|
209
|
+
f"provide 10~16 queries as possible, to get best result!\n"
|
210
|
+
f"You should provide both English version and chinese version of the refined queries!\n{extra_instruction}\n"
|
211
|
+
)
|
212
|
+
|
192
213
|
ref_q = ok(
|
193
214
|
await self.arefined_query(
|
194
|
-
|
195
|
-
|
196
|
-
f"I need to search related references to build up the content of the subsec mentioned above, which is `{subsec.title}`.\n"
|
197
|
-
f"provide 10~16 queries as possible, to get best result!\n"
|
198
|
-
f"You should provide both English version and chinese version of the refined queries!\n{extra_instruction}\n",
|
199
|
-
model=self.query_model,
|
215
|
+
search_req,
|
216
|
+
**self.query_model,
|
200
217
|
),
|
201
218
|
"Failed to refine query.",
|
202
219
|
)
|
203
220
|
|
221
|
+
if supervisor:
|
222
|
+
ref_q = await ask_retain(ref_q)
|
223
|
+
ret = await self.aretrieve(
|
224
|
+
ref_q,
|
225
|
+
ArticleChunk,
|
226
|
+
max_accepted=self.ref_limit,
|
227
|
+
result_per_query=self.result_per_query,
|
228
|
+
similarity_threshold=self.threshold,
|
229
|
+
)
|
230
|
+
|
231
|
+
cm.add_chunks(ok(ret))
|
232
|
+
ref_q = await self.arefined_query(
|
233
|
+
f"{cm.as_prompt()}\n\nAbove is the retrieved references in the first RAG, now we need to perform the second RAG.\n\n{search_req}",
|
234
|
+
**self.query_model,
|
235
|
+
)
|
236
|
+
|
237
|
+
if ref_q is None:
|
238
|
+
logger.warning("Second refine query is None, skipping.")
|
239
|
+
return
|
204
240
|
if supervisor:
|
205
241
|
ref_q = await ask_retain(ref_q)
|
206
242
|
|
207
|
-
|
208
|
-
ref_q,
|
243
|
+
ret = await self.aretrieve(
|
244
|
+
ref_q,
|
245
|
+
ArticleChunk,
|
246
|
+
max_accepted=int(self.ref_limit * self.search_increment_multiplier),
|
247
|
+
result_per_query=int(self.result_per_query * self.search_increment_multiplier),
|
248
|
+
similarity_threshold=self.threshold,
|
209
249
|
)
|
250
|
+
if ret is None:
|
251
|
+
logger.warning("Second retrieve is None, skipping.")
|
252
|
+
return
|
253
|
+
cm.add_chunks(ret)
|
254
|
+
|
255
|
+
|
256
|
+
class ArticleConsultRAG(Action, AdvancedRAG):
|
257
|
+
"""Write an article based on the provided outline."""
|
258
|
+
|
259
|
+
output_key: str = "consult_count"
|
260
|
+
search_increment_multiplier: float = 1.6
|
261
|
+
"""The multiplier to increase the limit of references to retrieve per query."""
|
262
|
+
ref_limit: int = 20
|
263
|
+
"""The final limit of references."""
|
264
|
+
ref_per_q: int = 3
|
265
|
+
"""The limit of references to retrieve per query."""
|
266
|
+
similarity_threshold: float = 0.62
|
267
|
+
"""The similarity threshold of references to retrieve."""
|
268
|
+
ref_q_model: ChooseKwargs = Field(default_factory=ChooseKwargs)
|
269
|
+
"""The model to use for refining query."""
|
270
|
+
req: str = TYPST_CITE_USAGE
|
271
|
+
"""The request for the rag model."""
|
272
|
+
|
273
|
+
@precheck_package(
|
274
|
+
"questionary", "`questionary` is required for supervisor mode, please install it by `fabricatio[qa]`"
|
275
|
+
)
|
276
|
+
async def _execute(self, collection_name: Optional[str] = None, **cxt) -> int:
|
277
|
+
from questionary import confirm, text
|
278
|
+
from rich import print as r_print
|
279
|
+
|
280
|
+
from fabricatio.rust import convert_all_block_tex, convert_all_inline_tex, fix_misplaced_labels
|
281
|
+
|
282
|
+
self.target_collection = collection_name or self.safe_target_collection
|
283
|
+
|
284
|
+
cm = CitationManager()
|
285
|
+
|
286
|
+
counter = 0
|
287
|
+
while (req := await text("User: ").ask_async()) is not None:
|
288
|
+
if await confirm("Empty the cm?").ask_async():
|
289
|
+
cm.empty()
|
290
|
+
await self.clued_search(
|
291
|
+
req,
|
292
|
+
cm,
|
293
|
+
refinery_kwargs=self.ref_q_model,
|
294
|
+
expand_multiplier=self.search_increment_multiplier,
|
295
|
+
base_accepted=self.ref_limit,
|
296
|
+
result_per_query=self.ref_per_q,
|
297
|
+
similarity_threshold=self.similarity_threshold,
|
298
|
+
)
|
299
|
+
|
300
|
+
ret = await self.aask(f"{cm.as_prompt()}\n{self.req}\n{req}")
|
301
|
+
|
302
|
+
ret = fix_misplaced_labels(ret)
|
303
|
+
ret = convert_all_inline_tex(ret)
|
304
|
+
ret = convert_all_block_tex(ret)
|
305
|
+
ret = cm.apply(ret)
|
306
|
+
|
307
|
+
r_print(ret)
|
308
|
+
counter += 1
|
309
|
+
logger.info(f"{counter} rounds of conversation.")
|
310
|
+
return counter
|
210
311
|
|
211
312
|
|
212
313
|
class TweakArticleRAG(Action, RAG, Censor):
|
@@ -290,7 +391,7 @@ class TweakArticleRAG(Action, RAG, Censor):
|
|
290
391
|
await self.censor_obj_inplace(
|
291
392
|
subsec,
|
292
393
|
ruleset=ruleset,
|
293
|
-
reference=f"{'\n\n'.join(d.display() for d in await self.aretrieve(refind_q, document_model=ArticleEssence,
|
394
|
+
reference=f"{'\n\n'.join(d.display() for d in await self.aretrieve(refind_q, document_model=ArticleEssence, max_accepted=self.ref_limit))}\n\n"
|
294
395
|
f"You can use Reference above to rewrite the `{subsec.__class__.__name__}`.\n"
|
295
396
|
f"You should Always use `{subsec.language}` as written language, "
|
296
397
|
f"which is the original language of the `{subsec.title}`. "
|
fabricatio/actions/output.py
CHANGED
@@ -3,14 +3,17 @@
|
|
3
3
|
from pathlib import Path
|
4
4
|
from typing import Any, Iterable, List, Mapping, Optional, Type
|
5
5
|
|
6
|
+
from fabricatio import TEMPLATE_MANAGER
|
7
|
+
from fabricatio.fs import dump_text
|
6
8
|
from fabricatio.journal import logger
|
7
9
|
from fabricatio.models.action import Action
|
8
10
|
from fabricatio.models.generic import FinalizedDumpAble, FromMapping, PersistentAble
|
9
11
|
from fabricatio.models.task import Task
|
12
|
+
from fabricatio.models.usages import LLMUsage
|
10
13
|
from fabricatio.utils import ok
|
11
14
|
|
12
15
|
|
13
|
-
class DumpFinalizedOutput(Action):
|
16
|
+
class DumpFinalizedOutput(Action, LLMUsage):
|
14
17
|
"""Dump the finalized output to a file."""
|
15
18
|
|
16
19
|
output_key: str = "dump_path"
|
@@ -38,7 +41,45 @@ class DumpFinalizedOutput(Action):
|
|
38
41
|
return dump_path.as_posix()
|
39
42
|
|
40
43
|
|
41
|
-
class
|
44
|
+
class RenderedDump(Action, LLMUsage):
|
45
|
+
"""Render the data to a file."""
|
46
|
+
|
47
|
+
output_key: str = "dump_path"
|
48
|
+
dump_path: Optional[str] = None
|
49
|
+
|
50
|
+
template_name: str
|
51
|
+
"""The template name to render the data."""
|
52
|
+
|
53
|
+
async def _execute(
|
54
|
+
self,
|
55
|
+
to_dump: FinalizedDumpAble,
|
56
|
+
task_input: Optional[Task] = None,
|
57
|
+
dump_path: Optional[str | Path] = None,
|
58
|
+
**_,
|
59
|
+
) -> str:
|
60
|
+
dump_path = Path(
|
61
|
+
dump_path
|
62
|
+
or self.dump_path
|
63
|
+
or ok(
|
64
|
+
await self.awhich_pathstr(
|
65
|
+
f"{ok(task_input, 'Neither `task_input` and `dump_path` is provided.').briefing}\n\nExtract a single path of the file, to which I will dump the data."
|
66
|
+
),
|
67
|
+
"Could not find the path of file to dump the data.",
|
68
|
+
)
|
69
|
+
)
|
70
|
+
|
71
|
+
logger.info(f"Saving output to {dump_path.as_posix()}")
|
72
|
+
dump_text(
|
73
|
+
dump_path,
|
74
|
+
TEMPLATE_MANAGER.render_template(
|
75
|
+
self.template_name, {to_dump.__class__.__name__: to_dump.finalized_dump()}
|
76
|
+
),
|
77
|
+
)
|
78
|
+
|
79
|
+
return dump_path.as_posix()
|
80
|
+
|
81
|
+
|
82
|
+
class PersistentAll(Action, LLMUsage):
|
42
83
|
"""Persist all the data to a file."""
|
43
84
|
|
44
85
|
output_key: str = "persistent_count"
|
@@ -0,0 +1,56 @@
|
|
1
|
+
"""Advanced RAG (Retrieval Augmented Generation) model."""
|
2
|
+
|
3
|
+
from typing import Optional, Unpack
|
4
|
+
|
5
|
+
from fabricatio.capabilities.rag import RAG
|
6
|
+
from fabricatio.journal import logger
|
7
|
+
from fabricatio.models.adv_kwargs_types import FetchKwargs
|
8
|
+
from fabricatio.models.extra.aricle_rag import ArticleChunk, CitationManager
|
9
|
+
from fabricatio.models.kwargs_types import ChooseKwargs
|
10
|
+
|
11
|
+
|
12
|
+
class AdvancedRAG(RAG):
|
13
|
+
"""A class representing the Advanced RAG (Retrieval Augmented Generation) model."""
|
14
|
+
|
15
|
+
async def clued_search(
|
16
|
+
self,
|
17
|
+
requirement: str,
|
18
|
+
cm: CitationManager,
|
19
|
+
max_capacity: int = 40,
|
20
|
+
max_round: int = 3,
|
21
|
+
expand_multiplier: float = 1.4,
|
22
|
+
base_accepted: int = 12,
|
23
|
+
refinery_kwargs: Optional[ChooseKwargs] = None,
|
24
|
+
**kwargs: Unpack[FetchKwargs],
|
25
|
+
) -> CitationManager:
|
26
|
+
"""Asynchronously performs a clued search based on a given requirement and citation manager."""
|
27
|
+
if max_round<=0:
|
28
|
+
raise ValueError("max_round should be greater than 0")
|
29
|
+
if max_round == 1:
|
30
|
+
logger.warning(
|
31
|
+
"max_round should be greater than 1, otherwise it behaves nothing different from the `self.aretrieve`"
|
32
|
+
)
|
33
|
+
|
34
|
+
refinery_kwargs = refinery_kwargs or {}
|
35
|
+
|
36
|
+
for i in range(max_round + 1, 1):
|
37
|
+
logger.info(f"Round [{i + 1}/{max_round}] search started.")
|
38
|
+
ref_q = await self.arefined_query(
|
39
|
+
f"{cm.as_prompt()}\n\nAbove is the retrieved references in the {i - 1}th RAG, now we need to perform the {i}th RAG."
|
40
|
+
f"\n\n{requirement}",
|
41
|
+
**refinery_kwargs,
|
42
|
+
)
|
43
|
+
if ref_q is None:
|
44
|
+
logger.error(f"At round [{i + 1}/{max_round}] search, failed to refine the query, exit.")
|
45
|
+
return cm
|
46
|
+
refs = await self.aretrieve(ref_q, ArticleChunk, base_accepted, **kwargs)
|
47
|
+
|
48
|
+
if (max_capacity := max_capacity - len(refs)) < 0:
|
49
|
+
cm.add_chunks(refs[0:max_capacity])
|
50
|
+
logger.debug(f"At round [{i + 1}/{max_round}] search, the capacity is not enough, exit.")
|
51
|
+
return cm
|
52
|
+
|
53
|
+
cm.add_chunks(refs)
|
54
|
+
base_accepted = int(base_accepted * expand_multiplier)
|
55
|
+
logger.debug(f"Exceeded max_round: {max_round}, exit.")
|
56
|
+
return cm
|
fabricatio/capabilities/rag.py
CHANGED
@@ -189,7 +189,7 @@ class RAG(EmbeddingUsage):
|
|
189
189
|
self,
|
190
190
|
query: List[str] | str,
|
191
191
|
document_model: Type[D],
|
192
|
-
|
192
|
+
max_accepted: int = 20,
|
193
193
|
**kwargs: Unpack[FetchKwargs],
|
194
194
|
) -> List[D]:
|
195
195
|
"""Retrieve data from the collection.
|
@@ -197,7 +197,7 @@ class RAG(EmbeddingUsage):
|
|
197
197
|
Args:
|
198
198
|
query (List[str] | str): The query to be used for retrieval.
|
199
199
|
document_model (Type[D]): The model class used to convert retrieved data into document objects.
|
200
|
-
|
200
|
+
max_accepted (int): The final limit on the number of results to return.
|
201
201
|
**kwargs (Unpack[FetchKwargs]): Additional keyword arguments for retrieval.
|
202
202
|
|
203
203
|
Returns:
|
@@ -211,9 +211,9 @@ class RAG(EmbeddingUsage):
|
|
211
211
|
document_model=document_model,
|
212
212
|
**kwargs,
|
213
213
|
)
|
214
|
-
)[:
|
214
|
+
)[:max_accepted]
|
215
215
|
|
216
|
-
async def arefined_query(self, question: List[str] | str, **kwargs: Unpack[ChooseKwargs]) -> Optional[List[str]]:
|
216
|
+
async def arefined_query(self, question: List[str] | str, **kwargs: Unpack[ChooseKwargs[Optional[List[str]]]]) -> Optional[List[str]]:
|
217
217
|
"""Refines the given question using a template.
|
218
218
|
|
219
219
|
Args:
|
fabricatio/config.py
CHANGED
@@ -328,9 +328,9 @@ class RoutingConfig(BaseModel):
|
|
328
328
|
allowed_fails: Optional[int] = 3
|
329
329
|
"""The number of allowed fails before the routing is considered failed."""
|
330
330
|
retry_after: int = 15
|
331
|
-
"""
|
332
|
-
cooldown_time: Optional[int] =
|
333
|
-
"""
|
331
|
+
"""Minimum time to wait before retrying a failed request."""
|
332
|
+
cooldown_time: Optional[int] = 60
|
333
|
+
"""Time to cooldown a deployment after failure in seconds."""
|
334
334
|
|
335
335
|
|
336
336
|
class Settings(BaseSettings):
|
fabricatio/fs/curd.py
CHANGED
@@ -20,7 +20,7 @@ def dump_text(path: Union[str, Path], text: str) -> None:
|
|
20
20
|
Returns:
|
21
21
|
None
|
22
22
|
"""
|
23
|
-
Path(path).write_text(text, encoding="utf-8", errors="ignore")
|
23
|
+
Path(path).write_text(text, encoding="utf-8", errors="ignore", newline="\n")
|
24
24
|
|
25
25
|
|
26
26
|
def copy_file(src: Union[str, Path], dst: Union[str, Path]) -> None:
|
fabricatio/models/action.py
CHANGED
@@ -12,12 +12,12 @@ Classes:
|
|
12
12
|
import traceback
|
13
13
|
from abc import abstractmethod
|
14
14
|
from asyncio import Queue, create_task
|
15
|
-
from typing import Any, Dict, Self, Tuple, Type, Union, final
|
15
|
+
from typing import Any, Dict, Self, Sequence, Tuple, Type, Union, final
|
16
16
|
|
17
17
|
from fabricatio.journal import logger
|
18
18
|
from fabricatio.models.generic import WithBriefing
|
19
19
|
from fabricatio.models.task import Task
|
20
|
-
from fabricatio.models.usages import
|
20
|
+
from fabricatio.models.usages import ToolBoxUsage
|
21
21
|
from fabricatio.utils import override_kwargs
|
22
22
|
from pydantic import Field, PrivateAttr
|
23
23
|
|
@@ -26,7 +26,7 @@ OUTPUT_KEY = "task_output"
|
|
26
26
|
INPUT_KEY = "task_input"
|
27
27
|
|
28
28
|
|
29
|
-
class Action(WithBriefing
|
29
|
+
class Action(WithBriefing):
|
30
30
|
"""Class that represents an action to be executed in a workflow.
|
31
31
|
|
32
32
|
Actions are the atomic units of work in a workflow. Each action performs
|
@@ -96,9 +96,9 @@ class Action(WithBriefing, LLMUsage):
|
|
96
96
|
return f"## Your personality: \n{self.personality}\n# The action you are going to perform: \n{super().briefing}"
|
97
97
|
return f"# The action you are going to perform: \n{super().briefing}"
|
98
98
|
|
99
|
-
def to_task_output(self) -> Self:
|
99
|
+
def to_task_output(self, task_output_key: str = OUTPUT_KEY) -> Self:
|
100
100
|
"""Set the output key to OUTPUT_KEY and return the action instance."""
|
101
|
-
self.output_key =
|
101
|
+
self.output_key = task_output_key
|
102
102
|
return self
|
103
103
|
|
104
104
|
|
@@ -123,9 +123,7 @@ class WorkFlow(WithBriefing, ToolBoxUsage):
|
|
123
123
|
_instances: Tuple[Action, ...] = PrivateAttr(default_factory=tuple)
|
124
124
|
"""Instantiated action objects to be executed in this workflow."""
|
125
125
|
|
126
|
-
steps:
|
127
|
-
frozen=True,
|
128
|
-
)
|
126
|
+
steps: Sequence[Union[Type[Action], Action]] = Field(frozen=True)
|
129
127
|
"""The sequence of actions to be executed, can be action classes or instances."""
|
130
128
|
|
131
129
|
task_input_key: str = Field(default=INPUT_KEY)
|