fabricatio 0.3.15.dev5__cp313-cp313-win_amd64.whl → 0.4.0__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fabricatio/__init__.py +9 -8
- fabricatio/actions/rules.py +83 -83
- fabricatio/rust.cp313-win_amd64.pyd +0 -0
- fabricatio/workflows/rag.py +2 -1
- fabricatio-0.4.0.data/scripts/tdown.exe +0 -0
- {fabricatio-0.3.15.dev5.dist-info → fabricatio-0.4.0.dist-info}/METADATA +17 -16
- fabricatio-0.4.0.dist-info/RECORD +18 -0
- fabricatio/actions/article.py +0 -415
- fabricatio/actions/article_rag.py +0 -407
- fabricatio/capabilities/__init__.py +0 -1
- fabricatio/capabilities/advanced_judge.py +0 -20
- fabricatio/capabilities/advanced_rag.py +0 -61
- fabricatio/capabilities/censor.py +0 -105
- fabricatio/capabilities/check.py +0 -212
- fabricatio/capabilities/correct.py +0 -228
- fabricatio/capabilities/extract.py +0 -74
- fabricatio/capabilities/propose.py +0 -65
- fabricatio/capabilities/rag.py +0 -264
- fabricatio/capabilities/rating.py +0 -404
- fabricatio/capabilities/review.py +0 -114
- fabricatio/capabilities/task.py +0 -113
- fabricatio/decorators.py +0 -253
- fabricatio/emitter.py +0 -177
- fabricatio/fs/__init__.py +0 -35
- fabricatio/fs/curd.py +0 -153
- fabricatio/fs/readers.py +0 -61
- fabricatio/journal.py +0 -12
- fabricatio/models/action.py +0 -263
- fabricatio/models/adv_kwargs_types.py +0 -63
- fabricatio/models/extra/__init__.py +0 -1
- fabricatio/models/extra/advanced_judge.py +0 -32
- fabricatio/models/extra/aricle_rag.py +0 -286
- fabricatio/models/extra/article_base.py +0 -488
- fabricatio/models/extra/article_essence.py +0 -98
- fabricatio/models/extra/article_main.py +0 -285
- fabricatio/models/extra/article_outline.py +0 -45
- fabricatio/models/extra/article_proposal.py +0 -52
- fabricatio/models/extra/patches.py +0 -20
- fabricatio/models/extra/problem.py +0 -165
- fabricatio/models/extra/rag.py +0 -98
- fabricatio/models/extra/rule.py +0 -51
- fabricatio/models/generic.py +0 -904
- fabricatio/models/kwargs_types.py +0 -121
- fabricatio/models/role.py +0 -156
- fabricatio/models/task.py +0 -310
- fabricatio/models/tool.py +0 -328
- fabricatio/models/usages.py +0 -791
- fabricatio/parser.py +0 -114
- fabricatio/rust.pyi +0 -846
- fabricatio/utils.py +0 -156
- fabricatio/workflows/articles.py +0 -24
- fabricatio-0.3.15.dev5.data/scripts/tdown.exe +0 -0
- fabricatio-0.3.15.dev5.data/scripts/ttm.exe +0 -0
- fabricatio-0.3.15.dev5.dist-info/RECORD +0 -63
- {fabricatio-0.3.15.dev5.dist-info → fabricatio-0.4.0.dist-info}/WHEEL +0 -0
- {fabricatio-0.3.15.dev5.dist-info → fabricatio-0.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,407 +0,0 @@
|
|
1
|
-
"""A module for writing articles using RAG (Retrieval-Augmented Generation) capabilities."""
|
2
|
-
|
3
|
-
from asyncio import gather
|
4
|
-
from pathlib import Path
|
5
|
-
from typing import ClassVar, List, Optional
|
6
|
-
|
7
|
-
from pydantic import Field, PositiveInt
|
8
|
-
|
9
|
-
from fabricatio.capabilities.advanced_rag import AdvancedRAG
|
10
|
-
from fabricatio.capabilities.censor import Censor
|
11
|
-
from fabricatio.capabilities.extract import Extract
|
12
|
-
from fabricatio.capabilities.rag import RAG
|
13
|
-
from fabricatio.decorators import precheck_package
|
14
|
-
from fabricatio.journal import logger
|
15
|
-
from fabricatio.models.action import Action
|
16
|
-
from fabricatio.models.extra.aricle_rag import ArticleChunk, CitationManager
|
17
|
-
from fabricatio.models.extra.article_essence import ArticleEssence
|
18
|
-
from fabricatio.models.extra.article_main import Article, ArticleChapter, ArticleSection, ArticleSubsection
|
19
|
-
from fabricatio.models.extra.article_outline import ArticleOutline
|
20
|
-
from fabricatio.models.extra.rule import RuleSet
|
21
|
-
from fabricatio.models.kwargs_types import ChooseKwargs, LLMKwargs
|
22
|
-
from fabricatio.rust import (
|
23
|
-
BibManager,
|
24
|
-
convert_all_tex_math,
|
25
|
-
fix_misplaced_labels,
|
26
|
-
)
|
27
|
-
from fabricatio.utils import ok
|
28
|
-
|
29
|
-
TYPST_CITE_USAGE = (
|
30
|
-
"citation number is REQUIRED to cite any reference!'\n"
|
31
|
-
"Legal citing syntax examples(seperated by |): [[1]]|[[1,2]]|[[1-3]]|[[12,13-15]]|[[1-3,5-7]]\n"
|
32
|
-
"Illegal citing syntax examples(seperated by |): [[1],[2],[3]]|[[1],[1-2]]\n"
|
33
|
-
"You SHALL not cite a single reference more than once!"
|
34
|
-
"It's recommended to cite multiple references that supports your conclusion at a time.\n"
|
35
|
-
)
|
36
|
-
|
37
|
-
TYPST_MATH_USAGE = (
|
38
|
-
"Wrap inline expression with '\\(' and '\\)',like '\\(>5m\\)' '\\(89%\\)', and wrap block equation with '\\[' and '\\]'.\n"
|
39
|
-
"In addition to that, you can add a label outside the block equation which can be used as a cross reference identifier, the label is a string wrapped in `<` and `>` like `<energy-release-rate-equation>`.Note that the label string should be a summarizing title for the equation being labeled and should never be written within the formula block.\n"
|
40
|
-
"you can refer to that label by using the syntax with prefix of `@eqt:`, which indicate that this notation is citing a label from the equations. For example ' @eqt:energy-release-rate-equation ' DO remember that the notation shall have both suffixed and prefixed space char which enable the compiler to distinguish the notation from the plaintext."
|
41
|
-
"Below is two usage example:\n"
|
42
|
-
"```typst\n"
|
43
|
-
"See @eqt:mass-energy-equation , it's the foundation of physics.\n"
|
44
|
-
"\\[\n"
|
45
|
-
"E = m c^2\n"
|
46
|
-
"\\] <mass-energy-equation>\n\n\n"
|
47
|
-
"In @eqt:mass-energy-equation , \\(m\\) stands for mass, \\(c\\) stands for speed of light, and \\(E\\) stands for energy. \n"
|
48
|
-
"```\n"
|
49
|
-
)
|
50
|
-
|
51
|
-
|
52
|
-
class WriteArticleContentRAG(Action, Extract, AdvancedRAG):
|
53
|
-
"""Write an article based on the provided outline."""
|
54
|
-
|
55
|
-
ctx_override: ClassVar[bool] = True
|
56
|
-
search_increment_multiplier: float = 1.6
|
57
|
-
"""The increment multiplier of the search increment."""
|
58
|
-
ref_limit: int = 35
|
59
|
-
"""The limit of references to be retrieved"""
|
60
|
-
threshold: float = 0.62
|
61
|
-
"""The threshold of relevance"""
|
62
|
-
extractor_model: LLMKwargs
|
63
|
-
"""The model to use for extracting the content from the retrieved references."""
|
64
|
-
query_model: ChooseKwargs
|
65
|
-
"""The model to use for querying the database"""
|
66
|
-
supervisor: bool = False
|
67
|
-
"""Whether to use supervisor mode"""
|
68
|
-
result_per_query: PositiveInt = 4
|
69
|
-
"""The number of results to be returned per query."""
|
70
|
-
cite_req: str = TYPST_CITE_USAGE
|
71
|
-
"""The req of the write article content."""
|
72
|
-
|
73
|
-
math_req: str = TYPST_MATH_USAGE
|
74
|
-
"""The req of the write article content."""
|
75
|
-
tei_endpoint: Optional[str] = None
|
76
|
-
|
77
|
-
async def _execute(
|
78
|
-
self,
|
79
|
-
article_outline: ArticleOutline,
|
80
|
-
collection_name: Optional[str] = None,
|
81
|
-
supervisor: Optional[bool] = None,
|
82
|
-
**cxt,
|
83
|
-
) -> Article:
|
84
|
-
article = Article.from_outline(article_outline).update_ref(article_outline)
|
85
|
-
self.target_collection = collection_name or self.safe_target_collection
|
86
|
-
if supervisor or (supervisor is None and self.supervisor):
|
87
|
-
for chap, sec, subsec in article.iter_subsections():
|
88
|
-
await self._supervisor_inner(article, article_outline, chap, sec, subsec)
|
89
|
-
|
90
|
-
else:
|
91
|
-
await gather(
|
92
|
-
*[
|
93
|
-
self._inner(article, article_outline, chap, sec, subsec)
|
94
|
-
for chap, sec, subsec in article.iter_subsections()
|
95
|
-
]
|
96
|
-
)
|
97
|
-
return article.convert_tex()
|
98
|
-
|
99
|
-
@precheck_package(
|
100
|
-
"questionary", "`questionary` is required for supervisor mode, please install it by `fabricatio[qa]`"
|
101
|
-
)
|
102
|
-
async def _supervisor_inner(
|
103
|
-
self,
|
104
|
-
article: Article,
|
105
|
-
article_outline: ArticleOutline,
|
106
|
-
chap: ArticleChapter,
|
107
|
-
sec: ArticleSection,
|
108
|
-
subsec: ArticleSubsection,
|
109
|
-
) -> ArticleSubsection:
|
110
|
-
from questionary import confirm, text
|
111
|
-
from rich import print as r_print
|
112
|
-
|
113
|
-
cm = CitationManager()
|
114
|
-
await self.search_database(article, article_outline, chap, sec, subsec, cm)
|
115
|
-
|
116
|
-
raw_paras = await self.write_raw(article, article_outline, chap, sec, subsec, cm)
|
117
|
-
r_print(raw_paras)
|
118
|
-
|
119
|
-
while not await confirm("Accept this version and continue?").ask_async():
|
120
|
-
if inst := await text("Search for more refs for additional spec.").ask_async():
|
121
|
-
await self.search_database(article, article_outline, chap, sec, subsec, cm, extra_instruction=inst)
|
122
|
-
|
123
|
-
if instruction := await text("Enter the instructions to improve").ask_async():
|
124
|
-
raw_paras = await self.write_raw(article, article_outline, chap, sec, subsec, cm, instruction)
|
125
|
-
if edt := await text("Edit", default=raw_paras).ask_async():
|
126
|
-
raw_paras = edt
|
127
|
-
|
128
|
-
raw_paras = fix_misplaced_labels(raw_paras)
|
129
|
-
raw_paras = convert_all_tex_math(raw_paras)
|
130
|
-
|
131
|
-
r_print(raw_paras)
|
132
|
-
|
133
|
-
return await self.extract_new_subsec(subsec, raw_paras, cm)
|
134
|
-
|
135
|
-
async def _inner(
|
136
|
-
self,
|
137
|
-
article: Article,
|
138
|
-
article_outline: ArticleOutline,
|
139
|
-
chap: ArticleChapter,
|
140
|
-
sec: ArticleSection,
|
141
|
-
subsec: ArticleSubsection,
|
142
|
-
) -> ArticleSubsection:
|
143
|
-
cm = CitationManager()
|
144
|
-
|
145
|
-
await self.search_database(article, article_outline, chap, sec, subsec, cm)
|
146
|
-
|
147
|
-
raw_paras = await self.write_raw(article, article_outline, chap, sec, subsec, cm)
|
148
|
-
|
149
|
-
raw_paras = "\n".join(p for p in raw_paras.splitlines() if p and not p.endswith("**") and not p.startswith("#"))
|
150
|
-
|
151
|
-
raw_paras = fix_misplaced_labels(raw_paras)
|
152
|
-
raw_paras = convert_all_tex_math(raw_paras)
|
153
|
-
|
154
|
-
return await self.extract_new_subsec(subsec, raw_paras, cm)
|
155
|
-
|
156
|
-
async def extract_new_subsec(
|
157
|
-
self, subsec: ArticleSubsection, raw_paras: str, cm: CitationManager
|
158
|
-
) -> ArticleSubsection:
|
159
|
-
"""Extract the new subsec."""
|
160
|
-
new_subsec = ok(
|
161
|
-
await self.extract(
|
162
|
-
ArticleSubsection,
|
163
|
-
raw_paras,
|
164
|
-
f"Above is the subsection titled `{subsec.title}`.\n"
|
165
|
-
f"I need you to extract the content to construct a new `{ArticleSubsection.__class__.__name__}`,"
|
166
|
-
f"Do not attempt to change the original content, your job is ONLY content extraction",
|
167
|
-
**self.extractor_model,
|
168
|
-
),
|
169
|
-
"Failed to propose new subsection.",
|
170
|
-
)
|
171
|
-
|
172
|
-
for p in new_subsec.paragraphs:
|
173
|
-
p.content = cm.apply(p.content)
|
174
|
-
p.description = cm.apply(p.description)
|
175
|
-
subsec.update_from(new_subsec)
|
176
|
-
logger.debug(f"{subsec.title}:rpl\n{subsec.display()}")
|
177
|
-
return subsec
|
178
|
-
|
179
|
-
async def write_raw(
|
180
|
-
self,
|
181
|
-
article: Article,
|
182
|
-
article_outline: ArticleOutline,
|
183
|
-
chap: ArticleChapter,
|
184
|
-
sec: ArticleSection,
|
185
|
-
subsec: ArticleSubsection,
|
186
|
-
cm: CitationManager,
|
187
|
-
extra_instruction: str = "",
|
188
|
-
) -> str:
|
189
|
-
"""Write the raw paragraphs of the subsec."""
|
190
|
-
return await self.aask(
|
191
|
-
f"{cm.as_prompt()}\nAbove is some related reference from other auther retrieved for you."
|
192
|
-
f"{article_outline.finalized_dump()}\n\nAbove is my article outline, I m writing graduate thesis titled `{article.title}`. "
|
193
|
-
f"More specifically, i m witting the Chapter `{chap.title}` >> Section `{sec.title}` >> Subsection `{subsec.title}`.\n"
|
194
|
-
f"Please help me write the paragraphs of the subsec mentioned above, which is `{subsec.title}`.\n"
|
195
|
-
f"{self.cite_req}\n{self.math_req}\n"
|
196
|
-
f"You SHALL use `{article.language}` as writing language.\n{extra_instruction}\n"
|
197
|
-
f"Do not use numbered list to display the outcome, you should regard you are writing the main text of the thesis.\n"
|
198
|
-
f"You should not copy others' works from the references directly on to my thesis, we can only harness the conclusion they have drawn.\n"
|
199
|
-
f"No extra explanation is allowed."
|
200
|
-
)
|
201
|
-
|
202
|
-
async def search_database(
|
203
|
-
self,
|
204
|
-
article: Article,
|
205
|
-
article_outline: ArticleOutline,
|
206
|
-
chap: ArticleChapter,
|
207
|
-
sec: ArticleSection,
|
208
|
-
subsec: ArticleSubsection,
|
209
|
-
cm: CitationManager,
|
210
|
-
extra_instruction: str = "",
|
211
|
-
) -> None:
|
212
|
-
"""Search database for related references."""
|
213
|
-
search_req = (
|
214
|
-
f"{article_outline.finalized_dump()}\n\nAbove is my article outline, I m writing graduate thesis titled `{article.title}`. "
|
215
|
-
f"More specifically, i m witting the Chapter `{chap.title}` >> Section `{sec.title}` >> Subsection `{subsec.title}`.\n"
|
216
|
-
f"I need to search related references to build up the content of the subsec mentioned above, which is `{subsec.title}`.\n"
|
217
|
-
f"provide 10~16 queries as possible, to get best result!\n"
|
218
|
-
f"You should provide both English version and chinese version of the refined queries!\n{extra_instruction}"
|
219
|
-
)
|
220
|
-
|
221
|
-
await self.clued_search(
|
222
|
-
search_req,
|
223
|
-
cm,
|
224
|
-
refinery_kwargs=self.query_model,
|
225
|
-
expand_multiplier=self.search_increment_multiplier,
|
226
|
-
base_accepted=self.ref_limit,
|
227
|
-
result_per_query=self.result_per_query,
|
228
|
-
similarity_threshold=self.threshold,
|
229
|
-
tei_endpoint=self.tei_endpoint,
|
230
|
-
)
|
231
|
-
|
232
|
-
|
233
|
-
class ArticleConsultRAG(Action, AdvancedRAG):
|
234
|
-
"""Write an article based on the provided outline."""
|
235
|
-
|
236
|
-
ctx_override: ClassVar[bool] = True
|
237
|
-
output_key: str = "consult_count"
|
238
|
-
search_increment_multiplier: float = 1.6
|
239
|
-
"""The multiplier to increase the limit of references to retrieve per query."""
|
240
|
-
ref_limit: int = 26
|
241
|
-
"""The final limit of references."""
|
242
|
-
ref_per_q: int = 13
|
243
|
-
"""The limit of references to retrieve per query."""
|
244
|
-
similarity_threshold: float = 0.62
|
245
|
-
"""The similarity threshold of references to retrieve."""
|
246
|
-
ref_q_model: ChooseKwargs = Field(default_factory=ChooseKwargs)
|
247
|
-
"""The model to use for refining query."""
|
248
|
-
req: str = TYPST_CITE_USAGE
|
249
|
-
"""The request for the rag model."""
|
250
|
-
tei_endpoint: Optional[str] = None
|
251
|
-
|
252
|
-
@precheck_package(
|
253
|
-
"questionary", "`questionary` is required for supervisor mode, please install it by `fabricatio[qa]`"
|
254
|
-
)
|
255
|
-
async def _execute(self, collection_name: Optional[str] = None, **cxt) -> int:
|
256
|
-
from questionary import confirm, text
|
257
|
-
from rich import print as r_print
|
258
|
-
|
259
|
-
self.target_collection = collection_name or self.safe_target_collection
|
260
|
-
|
261
|
-
cm = CitationManager()
|
262
|
-
|
263
|
-
counter = 0
|
264
|
-
while (req := await text("User: ").ask_async()) is not None:
|
265
|
-
if await confirm("Empty the cm?").ask_async():
|
266
|
-
cm.empty()
|
267
|
-
|
268
|
-
req = convert_all_tex_math(req)
|
269
|
-
|
270
|
-
await self.clued_search(
|
271
|
-
req,
|
272
|
-
cm,
|
273
|
-
refinery_kwargs=self.ref_q_model,
|
274
|
-
expand_multiplier=self.search_increment_multiplier,
|
275
|
-
base_accepted=self.ref_limit,
|
276
|
-
result_per_query=self.ref_per_q,
|
277
|
-
similarity_threshold=self.similarity_threshold,
|
278
|
-
tei_endpoint=self.tei_endpoint,
|
279
|
-
)
|
280
|
-
|
281
|
-
ret = await self.aask(f"{cm.as_prompt()}\n{self.req}\n{req}")
|
282
|
-
|
283
|
-
ret = fix_misplaced_labels(ret)
|
284
|
-
ret = convert_all_tex_math(ret)
|
285
|
-
ret = cm.apply(ret)
|
286
|
-
|
287
|
-
r_print(ret)
|
288
|
-
counter += 1
|
289
|
-
logger.info(f"{counter} rounds of conversation.")
|
290
|
-
return counter
|
291
|
-
|
292
|
-
|
293
|
-
class TweakArticleRAG(Action, RAG, Censor):
|
294
|
-
"""Write an article based on the provided outline.
|
295
|
-
|
296
|
-
This class inherits from `Action`, `RAG`, and `Censor` to provide capabilities for writing and refining articles
|
297
|
-
using Retrieval-Augmented Generation (RAG) techniques. It processes an article outline, enhances subsections by
|
298
|
-
searching for related references, and applies censoring rules to ensure compliance with the provided ruleset.
|
299
|
-
|
300
|
-
Attributes:
|
301
|
-
output_key (str): The key used to store the output of the action.
|
302
|
-
ruleset (Optional[RuleSet]): The ruleset to be used for censoring the article.
|
303
|
-
"""
|
304
|
-
|
305
|
-
output_key: str = "rag_tweaked_article"
|
306
|
-
"""The key used to store the output of the action."""
|
307
|
-
|
308
|
-
ruleset: Optional[RuleSet] = None
|
309
|
-
"""The ruleset to be used for censoring the article."""
|
310
|
-
|
311
|
-
ref_limit: int = 30
|
312
|
-
"""The limit of references to be retrieved"""
|
313
|
-
|
314
|
-
async def _execute(
|
315
|
-
self,
|
316
|
-
article: Article,
|
317
|
-
collection_name: str = "article_essence",
|
318
|
-
twk_rag_ruleset: Optional[RuleSet] = None,
|
319
|
-
parallel: bool = False,
|
320
|
-
**cxt,
|
321
|
-
) -> Article:
|
322
|
-
"""Write an article based on the provided outline.
|
323
|
-
|
324
|
-
This method processes the article outline, either in parallel or sequentially, by enhancing each subsection
|
325
|
-
with relevant references and applying censoring rules.
|
326
|
-
|
327
|
-
Args:
|
328
|
-
article (Article): The article to be processed.
|
329
|
-
collection_name (str): The name of the collection to view for processing.
|
330
|
-
twk_rag_ruleset (Optional[RuleSet]): The ruleset to apply for censoring. If not provided, the class's ruleset is used.
|
331
|
-
parallel (bool): If True, process subsections in parallel. Otherwise, process them sequentially.
|
332
|
-
**cxt: Additional context parameters.
|
333
|
-
|
334
|
-
Returns:
|
335
|
-
Article: The processed article with enhanced subsections and applied censoring rules.
|
336
|
-
"""
|
337
|
-
self.view(collection_name)
|
338
|
-
|
339
|
-
if parallel:
|
340
|
-
await gather(
|
341
|
-
*[
|
342
|
-
self._inner(article, subsec, ok(twk_rag_ruleset or self.ruleset, "No ruleset provided!"))
|
343
|
-
for _, __, subsec in article.iter_subsections()
|
344
|
-
],
|
345
|
-
return_exceptions=True,
|
346
|
-
)
|
347
|
-
else:
|
348
|
-
for _, __, subsec in article.iter_subsections():
|
349
|
-
await self._inner(article, subsec, ok(twk_rag_ruleset or self.ruleset, "No ruleset provided!"))
|
350
|
-
return article
|
351
|
-
|
352
|
-
async def _inner(self, article: Article, subsec: ArticleSubsection, ruleset: RuleSet) -> None:
|
353
|
-
"""Enhance a subsection of the article with references and apply censoring rules.
|
354
|
-
|
355
|
-
This method refines the query for the subsection, retrieves related references, and applies censoring rules
|
356
|
-
to the subsection's paragraphs.
|
357
|
-
|
358
|
-
Args:
|
359
|
-
article (Article): The article containing the subsection.
|
360
|
-
subsec (ArticleSubsection): The subsection to be enhanced.
|
361
|
-
ruleset (RuleSet): The ruleset to apply for censoring.
|
362
|
-
|
363
|
-
Returns:
|
364
|
-
None
|
365
|
-
"""
|
366
|
-
refind_q = ok(
|
367
|
-
await self.arefined_query(
|
368
|
-
f"{article.referenced.as_prompt()}\n# Subsection requiring reference enhancement\n{subsec.display()}\n"
|
369
|
-
)
|
370
|
-
)
|
371
|
-
await self.censor_obj_inplace(
|
372
|
-
subsec,
|
373
|
-
ruleset=ruleset,
|
374
|
-
reference=f"{'\n\n'.join(d.display() for d in await self.aretrieve(refind_q, document_model=ArticleEssence, max_accepted=self.ref_limit))}\n\n"
|
375
|
-
f"You can use Reference above to rewrite the `{subsec.__class__.__name__}`.\n"
|
376
|
-
f"You should Always use `{subsec.language}` as written language, "
|
377
|
-
f"which is the original language of the `{subsec.title}`. "
|
378
|
-
f"since rewrite a `{subsec.__class__.__name__}` in a different language is usually a bad choice",
|
379
|
-
)
|
380
|
-
|
381
|
-
|
382
|
-
class ChunkArticle(Action):
|
383
|
-
"""Chunk an article into smaller chunks."""
|
384
|
-
|
385
|
-
output_key: str = "article_chunks"
|
386
|
-
"""The key used to store the output of the action."""
|
387
|
-
max_chunk_size: Optional[int] = None
|
388
|
-
"""The maximum size of each chunk."""
|
389
|
-
max_overlapping_rate: Optional[float] = None
|
390
|
-
"""The maximum overlapping rate between chunks."""
|
391
|
-
|
392
|
-
async def _execute(
|
393
|
-
self,
|
394
|
-
article_path: str | Path,
|
395
|
-
bib_manager: BibManager,
|
396
|
-
max_chunk_size: Optional[int] = None,
|
397
|
-
max_overlapping_rate: Optional[float] = None,
|
398
|
-
**_,
|
399
|
-
) -> List[ArticleChunk]:
|
400
|
-
return ArticleChunk.from_file(
|
401
|
-
article_path,
|
402
|
-
bib_manager,
|
403
|
-
max_chunk_size=ok(max_chunk_size or self.max_chunk_size, "No max_chunk_size provided!"),
|
404
|
-
max_overlapping_rate=ok(
|
405
|
-
max_overlapping_rate or self.max_overlapping_rate, "No max_overlapping_rate provided!"
|
406
|
-
),
|
407
|
-
)
|
@@ -1 +0,0 @@
|
|
1
|
-
"""A module containing some high level capabilities."""
|
@@ -1,20 +0,0 @@
|
|
1
|
-
"""The Capabilities module for advanced judging."""
|
2
|
-
|
3
|
-
from abc import ABC
|
4
|
-
from typing import Optional, Unpack
|
5
|
-
|
6
|
-
from fabricatio.capabilities.propose import Propose
|
7
|
-
from fabricatio.models.extra.advanced_judge import JudgeMent
|
8
|
-
from fabricatio.models.kwargs_types import ValidateKwargs
|
9
|
-
|
10
|
-
|
11
|
-
class AdvancedJudge(Propose, ABC):
|
12
|
-
"""A class that judges the evidence and makes a final decision."""
|
13
|
-
|
14
|
-
async def evidently_judge(
|
15
|
-
self,
|
16
|
-
prompt: str,
|
17
|
-
**kwargs: Unpack[ValidateKwargs[JudgeMent]],
|
18
|
-
) -> Optional[JudgeMent]:
|
19
|
-
"""Judge the evidence and make a final decision."""
|
20
|
-
return await self.propose(JudgeMent, prompt, **kwargs)
|
@@ -1,61 +0,0 @@
|
|
1
|
-
"""Advanced RAG (Retrieval Augmented Generation) model."""
|
2
|
-
|
3
|
-
from abc import ABC
|
4
|
-
from typing import Optional, Unpack
|
5
|
-
|
6
|
-
from fabricatio.capabilities.rag import RAG
|
7
|
-
from fabricatio.journal import logger
|
8
|
-
from fabricatio.models.adv_kwargs_types import FetchKwargs
|
9
|
-
from fabricatio.models.extra.aricle_rag import ArticleChunk, CitationManager
|
10
|
-
from fabricatio.models.kwargs_types import ChooseKwargs
|
11
|
-
from fabricatio.utils import fallback_kwargs
|
12
|
-
|
13
|
-
|
14
|
-
class AdvancedRAG(RAG, ABC):
|
15
|
-
"""A class representing the Advanced RAG (Retrieval Augmented Generation) model."""
|
16
|
-
|
17
|
-
async def clued_search(
|
18
|
-
self,
|
19
|
-
requirement: str,
|
20
|
-
cm: CitationManager,
|
21
|
-
max_capacity: int = 40,
|
22
|
-
max_round: int = 3,
|
23
|
-
expand_multiplier: float = 1.4,
|
24
|
-
base_accepted: int = 12,
|
25
|
-
refinery_kwargs: Optional[ChooseKwargs] = None,
|
26
|
-
**kwargs: Unpack[FetchKwargs],
|
27
|
-
) -> CitationManager:
|
28
|
-
"""Asynchronously performs a clued search based on a given requirement and citation manager."""
|
29
|
-
if max_round <= 0:
|
30
|
-
raise ValueError("max_round should be greater than 0")
|
31
|
-
if max_round == 1:
|
32
|
-
logger.warning(
|
33
|
-
"max_round should be greater than 1, otherwise it behaves nothing different from the `self.aretrieve`"
|
34
|
-
)
|
35
|
-
|
36
|
-
refinery_kwargs = refinery_kwargs or {}
|
37
|
-
|
38
|
-
for i in range(1, max_round + 1):
|
39
|
-
logger.info(f"Round [{i}/{max_round}] search started.")
|
40
|
-
ref_q = await self.arefined_query(
|
41
|
-
f"{cm.as_prompt()}\n\nAbove is the retrieved references in the {i - 1}th RAG, now we need to perform the {i}th RAG."
|
42
|
-
f"\n\n{requirement}",
|
43
|
-
**refinery_kwargs,
|
44
|
-
)
|
45
|
-
|
46
|
-
if ref_q is None:
|
47
|
-
logger.error(f"At round [{i}/{max_round}] search, failed to refine the query, exit.")
|
48
|
-
return cm
|
49
|
-
refs = await self.aretrieve(
|
50
|
-
ref_q, ArticleChunk, base_accepted, **fallback_kwargs(kwargs, filter_expr=cm.as_milvus_filter_expr())
|
51
|
-
)
|
52
|
-
|
53
|
-
if (max_capacity := max_capacity - len(refs)) < 0:
|
54
|
-
cm.add_chunks(refs[0:max_capacity])
|
55
|
-
logger.debug(f"At round [{i}/{max_round}] search, the capacity is not enough, exit.")
|
56
|
-
return cm
|
57
|
-
|
58
|
-
cm.add_chunks(refs)
|
59
|
-
base_accepted = int(base_accepted * expand_multiplier)
|
60
|
-
logger.debug(f"Exceeded max_round: {max_round}, exit.")
|
61
|
-
return cm
|
@@ -1,105 +0,0 @@
|
|
1
|
-
"""Module for censoring objects and strings based on provided rulesets.
|
2
|
-
|
3
|
-
This module includes the Censor class which inherits from both Correct and Check classes.
|
4
|
-
It provides methods to censor objects and strings by first checking them against a ruleset and then correcting them if necessary.
|
5
|
-
"""
|
6
|
-
|
7
|
-
from abc import ABC
|
8
|
-
from typing import Optional, Unpack
|
9
|
-
|
10
|
-
from fabricatio.capabilities.check import Check
|
11
|
-
from fabricatio.capabilities.correct import Correct
|
12
|
-
from fabricatio.journal import logger
|
13
|
-
from fabricatio.models.extra.problem import Improvement
|
14
|
-
from fabricatio.models.extra.rule import RuleSet
|
15
|
-
from fabricatio.models.generic import ProposedUpdateAble, SketchedAble
|
16
|
-
from fabricatio.models.kwargs_types import ReferencedKwargs
|
17
|
-
from fabricatio.utils import override_kwargs
|
18
|
-
|
19
|
-
|
20
|
-
class Censor(Correct, Check, ABC):
|
21
|
-
"""Class to censor objects and strings based on provided rulesets.
|
22
|
-
|
23
|
-
Inherits from both Correct and Check classes.
|
24
|
-
Provides methods to censor objects and strings by first checking them against a ruleset and then correcting them if necessary.
|
25
|
-
|
26
|
-
"""
|
27
|
-
|
28
|
-
async def censor_obj[M: SketchedAble](
|
29
|
-
self, obj: M, ruleset: RuleSet, **kwargs: Unpack[ReferencedKwargs[M]]
|
30
|
-
) -> Optional[M]:
|
31
|
-
"""Censors an object based on the provided ruleset.
|
32
|
-
|
33
|
-
Args:
|
34
|
-
obj (M): The object to be censored.
|
35
|
-
ruleset (RuleSet): The ruleset to apply for censoring.
|
36
|
-
**kwargs: Additional keyword arguments to be passed to the check and correct methods.
|
37
|
-
|
38
|
-
Returns:
|
39
|
-
Optional[M]: The censored object if corrections were made, otherwise None.
|
40
|
-
|
41
|
-
Note:
|
42
|
-
This method first checks the object against the ruleset and then corrects it if necessary.
|
43
|
-
"""
|
44
|
-
imp = await self.check_obj(obj, ruleset, **override_kwargs(kwargs, default=None))
|
45
|
-
if imp is None:
|
46
|
-
return None
|
47
|
-
if not imp:
|
48
|
-
logger.info(f"No improvement found for `{obj.__class__.__name__}`.")
|
49
|
-
return obj
|
50
|
-
logger.info(f"Generated {len(imp)} improvement(s) for `{obj.__class__.__name__}")
|
51
|
-
return await self.correct_obj(obj, Improvement.gather(*imp), **kwargs)
|
52
|
-
|
53
|
-
async def censor_string(
|
54
|
-
self, input_text: str, ruleset: RuleSet, **kwargs: Unpack[ReferencedKwargs[str]]
|
55
|
-
) -> Optional[str]:
|
56
|
-
"""Censors a string based on the provided ruleset.
|
57
|
-
|
58
|
-
Args:
|
59
|
-
input_text (str): The string to be censored.
|
60
|
-
ruleset (RuleSet): The ruleset to apply for censoring.
|
61
|
-
**kwargs: Additional keyword arguments to be passed to the check and correct methods.
|
62
|
-
|
63
|
-
Returns:
|
64
|
-
Optional[str]: The censored string if corrections were made, otherwise None.
|
65
|
-
|
66
|
-
Note:
|
67
|
-
This method first checks the string against the ruleset and then corrects it if necessary.
|
68
|
-
"""
|
69
|
-
imp = await self.check_string(input_text, ruleset, **override_kwargs(kwargs, default=None))
|
70
|
-
if imp is None:
|
71
|
-
logger.warning(f"Censor failed for string:\n{input_text}")
|
72
|
-
return None
|
73
|
-
if not imp:
|
74
|
-
logger.info("No improvement found for string.")
|
75
|
-
return input_text
|
76
|
-
logger.info(f"Generated {len(imp)} improvement(s) for string.")
|
77
|
-
return await self.correct_string(input_text, Improvement.gather(*imp), **kwargs)
|
78
|
-
|
79
|
-
async def censor_obj_inplace[M: ProposedUpdateAble](
|
80
|
-
self, obj: M, ruleset: RuleSet, **kwargs: Unpack[ReferencedKwargs[M]]
|
81
|
-
) -> Optional[M]:
|
82
|
-
"""Censors an object in-place based on the provided ruleset.
|
83
|
-
|
84
|
-
This method modifies the object directly if corrections are needed.
|
85
|
-
|
86
|
-
Args:
|
87
|
-
obj (M): The object to be censored.
|
88
|
-
ruleset (RuleSet): The ruleset to apply for censoring.
|
89
|
-
**kwargs: Additional keyword arguments to be passed to the check and correct methods.
|
90
|
-
|
91
|
-
Returns:
|
92
|
-
Optional[M]: The censored object if corrections were made, otherwise None.
|
93
|
-
|
94
|
-
Note:
|
95
|
-
This method first checks the object against the ruleset and then corrects it in-place if necessary.
|
96
|
-
"""
|
97
|
-
imp = await self.check_obj(obj, ruleset, **override_kwargs(kwargs, default=None))
|
98
|
-
if imp is None:
|
99
|
-
logger.warning(f"Censor failed for `{obj.__class__.__name__}`")
|
100
|
-
return None
|
101
|
-
if not imp:
|
102
|
-
logger.info(f"No improvement found for `{obj.__class__.__name__}`.")
|
103
|
-
return obj
|
104
|
-
logger.info(f"Generated {len(imp)} improvement(s) for `{obj.__class__.__name__}")
|
105
|
-
return await self.correct_obj_inplace(obj, improvement=Improvement.gather(*imp), **kwargs)
|