fabricatio 0.2.9.dev4__cp312-cp312-win_amd64.whl → 0.2.10.dev1__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fabricatio/actions/article.py +11 -107
- fabricatio/actions/article_rag.py +33 -2
- fabricatio/actions/rag.py +40 -18
- fabricatio/capabilities/check.py +2 -1
- fabricatio/capabilities/rag.py +41 -231
- fabricatio/constants.py +20 -0
- fabricatio/decorators.py +23 -0
- fabricatio/models/adv_kwargs_types.py +35 -0
- fabricatio/models/events.py +6 -6
- fabricatio/models/extra/advanced_judge.py +2 -2
- fabricatio/models/extra/aricle_rag.py +120 -0
- fabricatio/models/extra/article_base.py +2 -186
- fabricatio/models/extra/article_essence.py +8 -7
- fabricatio/models/extra/article_main.py +12 -107
- fabricatio/models/extra/problem.py +12 -17
- fabricatio/models/extra/rag.py +98 -0
- fabricatio/models/extra/rule.py +1 -2
- fabricatio/models/generic.py +19 -11
- fabricatio/models/kwargs_types.py +6 -36
- fabricatio/models/task.py +3 -3
- fabricatio/models/usages.py +73 -5
- fabricatio/rust.cp312-win_amd64.pyd +0 -0
- fabricatio/rust.pyi +35 -6
- fabricatio/utils.py +14 -1
- {fabricatio-0.2.9.dev4.data → fabricatio-0.2.10.dev1.data}/scripts/tdown.exe +0 -0
- {fabricatio-0.2.9.dev4.dist-info → fabricatio-0.2.10.dev1.dist-info}/METADATA +1 -4
- {fabricatio-0.2.9.dev4.dist-info → fabricatio-0.2.10.dev1.dist-info}/RECORD +29 -27
- fabricatio/models/utils.py +0 -148
- {fabricatio-0.2.9.dev4.dist-info → fabricatio-0.2.10.dev1.dist-info}/WHEEL +0 -0
- {fabricatio-0.2.9.dev4.dist-info → fabricatio-0.2.10.dev1.dist-info}/licenses/LICENSE +0 -0
fabricatio/actions/article.py
CHANGED
@@ -4,6 +4,7 @@ from asyncio import gather
|
|
4
4
|
from pathlib import Path
|
5
5
|
from typing import Callable, List, Optional
|
6
6
|
|
7
|
+
from fabricatio.rust import BibManager, detect_language
|
7
8
|
from more_itertools import filter_map
|
8
9
|
|
9
10
|
from fabricatio.capabilities.censor import Censor
|
@@ -11,14 +12,12 @@ from fabricatio.capabilities.propose import Propose
|
|
11
12
|
from fabricatio.fs import safe_text_read
|
12
13
|
from fabricatio.journal import logger
|
13
14
|
from fabricatio.models.action import Action
|
14
|
-
from fabricatio.models.extra.article_base import SubSectionBase
|
15
15
|
from fabricatio.models.extra.article_essence import ArticleEssence
|
16
16
|
from fabricatio.models.extra.article_main import Article
|
17
17
|
from fabricatio.models.extra.article_outline import ArticleOutline
|
18
18
|
from fabricatio.models.extra.article_proposal import ArticleProposal
|
19
19
|
from fabricatio.models.extra.rule import RuleSet
|
20
20
|
from fabricatio.models.task import Task
|
21
|
-
from fabricatio.rust import BibManager, detect_language
|
22
21
|
from fabricatio.utils import ok
|
23
22
|
|
24
23
|
|
@@ -79,7 +78,7 @@ class FixArticleEssence(Action):
|
|
79
78
|
out = []
|
80
79
|
count = 0
|
81
80
|
for a in article_essence:
|
82
|
-
if key := (bib_mgr.
|
81
|
+
if key := (bib_mgr.get_cite_key_by_title(a.title) or bib_mgr.get_cite_key_fuzzy(a.title)):
|
83
82
|
a.title = bib_mgr.get_title_by_key(key) or a.title
|
84
83
|
a.authors = bib_mgr.get_author_by_key(key) or a.authors
|
85
84
|
a.publication_year = bib_mgr.get_year_by_key(key) or a.publication_year
|
@@ -142,11 +141,17 @@ class GenerateInitialOutline(Action, Propose):
|
|
142
141
|
article_proposal: ArticleProposal,
|
143
142
|
**_,
|
144
143
|
) -> Optional[ArticleOutline]:
|
144
|
+
raw_outline = await self.aask(
|
145
|
+
f"{(article_proposal.as_prompt())}\n\nNote that you should use `{article_proposal.language}` to write the `ArticleOutline`\n"
|
146
|
+
f"Design each chapter of a proper and academic and ready for release manner.\n"
|
147
|
+
f"You Must make sure every chapter have sections, and every section have subsections.\n"
|
148
|
+
f"Make the chapter and sections and subsections bing divided into a specific enough article component.",
|
149
|
+
)
|
150
|
+
|
145
151
|
return ok(
|
146
152
|
await self.propose(
|
147
153
|
ArticleOutline,
|
148
|
-
f"{
|
149
|
-
f"You Must make sure every chapter have sections, and every section have subsections.",
|
154
|
+
f"{raw_outline}\n\n\n\noutline provided above is the outline i need to extract to a JSON,",
|
150
155
|
),
|
151
156
|
"Could not generate the initial outline.",
|
152
157
|
).update_ref(article_proposal)
|
@@ -178,7 +183,7 @@ class FixIntrospectedErrors(Action, Censor):
|
|
178
183
|
await self.censor_obj(
|
179
184
|
article_outline,
|
180
185
|
ruleset=ok(intro_fix_ruleset or self.ruleset, "No ruleset provided"),
|
181
|
-
reference=f"{article_outline.
|
186
|
+
reference=f"{article_outline.display()}\n # Fatal Error of the Original Article Outline\n{pack}",
|
182
187
|
),
|
183
188
|
"Could not correct the component.",
|
184
189
|
).update_ref(origin)
|
@@ -191,107 +196,6 @@ class FixIntrospectedErrors(Action, Censor):
|
|
191
196
|
return article_outline
|
192
197
|
|
193
198
|
|
194
|
-
class FixIllegalReferences(Action, Censor):
|
195
|
-
"""Fix illegal references in the article outline."""
|
196
|
-
|
197
|
-
output_key: str = "illegal_references_fixed_outline"
|
198
|
-
"""The key of the output data."""
|
199
|
-
|
200
|
-
ruleset: Optional[RuleSet] = None
|
201
|
-
"""Ruleset to use to fix the illegal references."""
|
202
|
-
max_error_count: Optional[int] = None
|
203
|
-
"""The maximum number of errors to fix."""
|
204
|
-
|
205
|
-
async def _execute(
|
206
|
-
self,
|
207
|
-
article_outline: ArticleOutline,
|
208
|
-
ref_fix_ruleset: Optional[RuleSet] = None,
|
209
|
-
**_,
|
210
|
-
) -> Optional[ArticleOutline]:
|
211
|
-
counter = 0
|
212
|
-
while pack := article_outline.find_illegal_ref(gather_identical=True):
|
213
|
-
logger.info(f"Found {counter}th illegal references")
|
214
|
-
ref_seq, err = ok(pack)
|
215
|
-
logger.warning(f"Found illegal referring error: {err}")
|
216
|
-
new = ok(
|
217
|
-
await self.censor_obj(
|
218
|
-
ref_seq[0],
|
219
|
-
ruleset=ok(ref_fix_ruleset or self.ruleset, "No ruleset provided"),
|
220
|
-
reference=f"{article_outline.as_prompt()}\n# Some Basic errors found that need to be fixed\n{err}",
|
221
|
-
),
|
222
|
-
"Could not correct the component",
|
223
|
-
)
|
224
|
-
for r in ref_seq:
|
225
|
-
r.update_from(new)
|
226
|
-
if self.max_error_count and counter > self.max_error_count:
|
227
|
-
logger.warning("Max error count reached, stopping.")
|
228
|
-
break
|
229
|
-
counter += 1
|
230
|
-
|
231
|
-
return article_outline
|
232
|
-
|
233
|
-
|
234
|
-
class TweakOutlineForwardRef(Action, Censor):
|
235
|
-
"""Tweak the forward references in the article outline.
|
236
|
-
|
237
|
-
Ensures that the conclusions of the current chapter effectively support the analysis of subsequent chapters.
|
238
|
-
"""
|
239
|
-
|
240
|
-
output_key: str = "article_outline_fw_ref_checked"
|
241
|
-
ruleset: Optional[RuleSet] = None
|
242
|
-
"""Ruleset to use to fix the illegal references."""
|
243
|
-
|
244
|
-
async def _execute(
|
245
|
-
self, article_outline: ArticleOutline, ref_twk_ruleset: Optional[RuleSet] = None, **cxt
|
246
|
-
) -> ArticleOutline:
|
247
|
-
return await self._inner(
|
248
|
-
article_outline,
|
249
|
-
ruleset=ok(ref_twk_ruleset or self.ruleset, "No ruleset provided"),
|
250
|
-
field_name="support_to",
|
251
|
-
)
|
252
|
-
|
253
|
-
async def _inner(self, article_outline: ArticleOutline, ruleset: RuleSet, field_name: str) -> ArticleOutline:
|
254
|
-
await gather(
|
255
|
-
*[self._loop(a[-1], article_outline, field_name, ruleset) for a in article_outline.iter_subsections()],
|
256
|
-
)
|
257
|
-
|
258
|
-
return article_outline
|
259
|
-
|
260
|
-
async def _loop(
|
261
|
-
self, a: SubSectionBase, article_outline: ArticleOutline, field_name: str, ruleset: RuleSet
|
262
|
-
) -> None:
|
263
|
-
if judge := await self.evidently_judge(
|
264
|
-
f"{article_outline.as_prompt()}\n\n{a.display()}\n"
|
265
|
-
f"Does the `{a.__class__.__name__}`'s `{field_name}` field need to be extended or tweaked?"
|
266
|
-
):
|
267
|
-
await self.censor_obj_inplace(
|
268
|
-
a,
|
269
|
-
ruleset=ruleset,
|
270
|
-
reference=f"{article_outline.as_prompt()}\n"
|
271
|
-
f"The Article component titled `{a.title}` whose `{field_name}` field needs to be extended or tweaked.\n"
|
272
|
-
f"# Judgement\n{judge.display()}",
|
273
|
-
)
|
274
|
-
|
275
|
-
|
276
|
-
class TweakOutlineBackwardRef(TweakOutlineForwardRef):
|
277
|
-
"""Tweak the backward references in the article outline.
|
278
|
-
|
279
|
-
Ensures that the prerequisites of the current chapter are correctly referenced in the `depend_on` field.
|
280
|
-
"""
|
281
|
-
|
282
|
-
output_key: str = "article_outline_bw_ref_checked"
|
283
|
-
ruleset: Optional[RuleSet] = None
|
284
|
-
|
285
|
-
async def _execute(
|
286
|
-
self, article_outline: ArticleOutline, ref_twk_ruleset: Optional[RuleSet] = None, **cxt
|
287
|
-
) -> ArticleOutline:
|
288
|
-
return await self._inner(
|
289
|
-
article_outline,
|
290
|
-
ruleset=ok(ref_twk_ruleset or self.ruleset, "No ruleset provided"),
|
291
|
-
field_name="depend_on",
|
292
|
-
)
|
293
|
-
|
294
|
-
|
295
199
|
class GenerateArticle(Action, Censor):
|
296
200
|
"""Generate the article based on the outline."""
|
297
201
|
|
@@ -1,11 +1,15 @@
|
|
1
1
|
"""A module for writing articles using RAG (Retrieval-Augmented Generation) capabilities."""
|
2
2
|
|
3
3
|
from asyncio import gather
|
4
|
-
from
|
4
|
+
from pathlib import Path
|
5
|
+
from typing import List, Optional
|
5
6
|
|
7
|
+
from fabricatio import BibManager
|
6
8
|
from fabricatio.capabilities.censor import Censor
|
7
9
|
from fabricatio.capabilities.rag import RAG
|
8
10
|
from fabricatio.models.action import Action
|
11
|
+
from fabricatio.models.extra.aricle_rag import ArticleChunk
|
12
|
+
from fabricatio.models.extra.article_essence import ArticleEssence
|
9
13
|
from fabricatio.models.extra.article_main import Article, ArticleSubsection
|
10
14
|
from fabricatio.models.extra.rule import RuleSet
|
11
15
|
from fabricatio.utils import ok
|
@@ -97,9 +101,36 @@ class TweakArticleRAG(Action, RAG, Censor):
|
|
97
101
|
await self.censor_obj_inplace(
|
98
102
|
subsec,
|
99
103
|
ruleset=ruleset,
|
100
|
-
reference=f"{await self.
|
104
|
+
reference=f"{'\n\n'.join(d.display() for d in await self.aretrieve(refind_q, document_model=ArticleEssence, final_limit=self.ref_limit))}\n\n"
|
101
105
|
f"You can use Reference above to rewrite the `{subsec.__class__.__name__}`.\n"
|
102
106
|
f"You should Always use `{subsec.language}` as written language, "
|
103
107
|
f"which is the original language of the `{subsec.title}`. "
|
104
108
|
f"since rewrite a `{subsec.__class__.__name__}` in a different language is usually a bad choice",
|
105
109
|
)
|
110
|
+
|
111
|
+
|
112
|
+
class ChunkArticle(Action):
|
113
|
+
"""Chunk an article into smaller chunks."""
|
114
|
+
|
115
|
+
output_key:str = "article_chunks"
|
116
|
+
"""The key used to store the output of the action."""
|
117
|
+
max_chunk_size: Optional[int] = None
|
118
|
+
"""The maximum size of each chunk."""
|
119
|
+
max_overlapping_rate: Optional[float] = None
|
120
|
+
"""The maximum overlapping rate between chunks."""
|
121
|
+
|
122
|
+
async def _execute(
|
123
|
+
self,
|
124
|
+
article_path: str | Path,
|
125
|
+
bib_manager: BibManager,
|
126
|
+
max_chunk_size: Optional[int] = None,
|
127
|
+
max_overlapping_rate: Optional[float] = None,
|
128
|
+
**_,
|
129
|
+
) -> List[ArticleChunk]:
|
130
|
+
return ArticleChunk.from_file(
|
131
|
+
article_path,
|
132
|
+
bib_manager,
|
133
|
+
max_chunk_size=ok(max_chunk_size or self.max_chunk_size, "No max_chunk_size provided!"),
|
134
|
+
max_overlapping_rate=ok(max_overlapping_rate or self.max_overlapping_rate, "No max_overlapping_rate provided!"),
|
135
|
+
)
|
136
|
+
|
fabricatio/actions/rag.py
CHANGED
@@ -5,34 +5,56 @@ from typing import List, Optional
|
|
5
5
|
from questionary import text
|
6
6
|
|
7
7
|
from fabricatio.capabilities.rag import RAG
|
8
|
+
from fabricatio.config import configs
|
8
9
|
from fabricatio.journal import logger
|
9
10
|
from fabricatio.models.action import Action
|
10
|
-
from fabricatio.models.
|
11
|
+
from fabricatio.models.extra.rag import MilvusClassicModel, MilvusDataBase
|
11
12
|
from fabricatio.models.task import Task
|
13
|
+
from fabricatio.utils import ok
|
12
14
|
|
13
15
|
|
14
16
|
class InjectToDB(Action, RAG):
|
15
17
|
"""Inject data into the database."""
|
16
18
|
|
17
19
|
output_key: str = "collection_name"
|
20
|
+
collection_name: str = "my_collection"
|
21
|
+
"""The name of the collection to inject data into."""
|
18
22
|
|
19
|
-
async def _execute[T:
|
20
|
-
self, to_inject: Optional[T] | List[Optional[T]],
|
23
|
+
async def _execute[T: MilvusDataBase](
|
24
|
+
self, to_inject: Optional[T] | List[Optional[T]], override_inject: bool = False, **_
|
21
25
|
) -> Optional[str]:
|
26
|
+
from pymilvus.milvus_client import IndexParams
|
27
|
+
|
28
|
+
if to_inject is None:
|
29
|
+
return None
|
22
30
|
if not isinstance(to_inject, list):
|
23
31
|
to_inject = [to_inject]
|
24
|
-
|
32
|
+
if not (seq := [t for t in to_inject if t is not None]): # filter out None
|
33
|
+
return None
|
34
|
+
logger.info(f"Injecting {len(seq)} items into the collection '{self.collection_name}'")
|
25
35
|
if override_inject:
|
26
|
-
self.check_client().client.drop_collection(collection_name)
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
+
self.check_client().client.drop_collection(self.collection_name)
|
37
|
+
|
38
|
+
await self.view(
|
39
|
+
self.collection_name,
|
40
|
+
create=True,
|
41
|
+
schema=seq[0].as_milvus_schema(
|
42
|
+
ok(
|
43
|
+
self.milvus_dimensions
|
44
|
+
or configs.rag.milvus_dimensions
|
45
|
+
or self.embedding_dimensions
|
46
|
+
or configs.embedding.dimensions
|
47
|
+
),
|
48
|
+
),
|
49
|
+
index_params=IndexParams(
|
50
|
+
seq[0].vector_field_name,
|
51
|
+
index_name=seq[0].vector_field_name,
|
52
|
+
index_type=seq[0].index_type,
|
53
|
+
metric_type=seq[0].metric_type,
|
54
|
+
),
|
55
|
+
).add_document(seq, flush=True)
|
56
|
+
|
57
|
+
return self.collection_name
|
36
58
|
|
37
59
|
|
38
60
|
class RAGTalk(Action, RAG):
|
@@ -62,10 +84,10 @@ class RAGTalk(Action, RAG):
|
|
62
84
|
user_say = await text("User: ").ask_async()
|
63
85
|
if user_say is None:
|
64
86
|
break
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
87
|
+
ret: List[MilvusClassicModel] = await self.aretrieve(user_say, document_model=MilvusClassicModel)
|
88
|
+
|
89
|
+
gpt_say = await self.aask(
|
90
|
+
user_say, system_message="\n".join(m.text for m in ret) + "\nYou can refer facts provided above."
|
69
91
|
)
|
70
92
|
print(f"GPT: {gpt_say}") # noqa: T201
|
71
93
|
counter += 1
|
fabricatio/capabilities/check.py
CHANGED
@@ -104,7 +104,8 @@ class Check(AdvancedJudge, Propose):
|
|
104
104
|
- Proposes Improvement only when violation is confirmed
|
105
105
|
"""
|
106
106
|
if judge := await self.evidently_judge(
|
107
|
-
f"# Content to exam\n{input_text}\n\n# Rule Must to follow\n{rule.display()}\nDoes `Content to exam` provided above violate the `
|
107
|
+
f"# Content to exam\n{input_text}\n\n# Rule Must to follow\n{rule.display()}\nDoes `Content to exam` provided above violate the `{rule.name}` provided above?"
|
108
|
+
f"should I take some measure to fix that violation? true for I do need, false for I don't need.",
|
108
109
|
**override_kwargs(kwargs, default=None),
|
109
110
|
):
|
110
111
|
logger.info(f"Rule `{rule.name}` violated: \n{judge.display()}")
|