fabricatio 0.2.9.dev4__cp312-cp312-manylinux_2_34_x86_64.whl → 0.2.10.dev1__cp312-cp312-manylinux_2_34_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,7 @@ from asyncio import gather
4
4
  from pathlib import Path
5
5
  from typing import Callable, List, Optional
6
6
 
7
+ from fabricatio.rust import BibManager, detect_language
7
8
  from more_itertools import filter_map
8
9
 
9
10
  from fabricatio.capabilities.censor import Censor
@@ -11,14 +12,12 @@ from fabricatio.capabilities.propose import Propose
11
12
  from fabricatio.fs import safe_text_read
12
13
  from fabricatio.journal import logger
13
14
  from fabricatio.models.action import Action
14
- from fabricatio.models.extra.article_base import SubSectionBase
15
15
  from fabricatio.models.extra.article_essence import ArticleEssence
16
16
  from fabricatio.models.extra.article_main import Article
17
17
  from fabricatio.models.extra.article_outline import ArticleOutline
18
18
  from fabricatio.models.extra.article_proposal import ArticleProposal
19
19
  from fabricatio.models.extra.rule import RuleSet
20
20
  from fabricatio.models.task import Task
21
- from fabricatio.rust import BibManager, detect_language
22
21
  from fabricatio.utils import ok
23
22
 
24
23
 
@@ -79,7 +78,7 @@ class FixArticleEssence(Action):
79
78
  out = []
80
79
  count = 0
81
80
  for a in article_essence:
82
- if key := (bib_mgr.get_cite_key(a.title) or bib_mgr.get_cite_key_fuzzy(a.title)):
81
+ if key := (bib_mgr.get_cite_key_by_title(a.title) or bib_mgr.get_cite_key_fuzzy(a.title)):
83
82
  a.title = bib_mgr.get_title_by_key(key) or a.title
84
83
  a.authors = bib_mgr.get_author_by_key(key) or a.authors
85
84
  a.publication_year = bib_mgr.get_year_by_key(key) or a.publication_year
@@ -142,11 +141,17 @@ class GenerateInitialOutline(Action, Propose):
142
141
  article_proposal: ArticleProposal,
143
142
  **_,
144
143
  ) -> Optional[ArticleOutline]:
144
+ raw_outline = await self.aask(
145
+ f"{(article_proposal.as_prompt())}\n\nNote that you should use `{article_proposal.language}` to write the `ArticleOutline`\n"
146
+ f"Design each chapter of a proper and academic and ready for release manner.\n"
147
+ f"You Must make sure every chapter have sections, and every section have subsections.\n"
148
+ f"Make the chapter and sections and subsections bing divided into a specific enough article component.",
149
+ )
150
+
145
151
  return ok(
146
152
  await self.propose(
147
153
  ArticleOutline,
148
- f"{(article_proposal.as_prompt())}\n\nNote that you should use `{article_proposal.language}` to write the `ArticleOutline`\n"
149
- f"You Must make sure every chapter have sections, and every section have subsections.",
154
+ f"{raw_outline}\n\n\n\noutline provided above is the outline i need to extract to a JSON,",
150
155
  ),
151
156
  "Could not generate the initial outline.",
152
157
  ).update_ref(article_proposal)
@@ -178,7 +183,7 @@ class FixIntrospectedErrors(Action, Censor):
178
183
  await self.censor_obj(
179
184
  article_outline,
180
185
  ruleset=ok(intro_fix_ruleset or self.ruleset, "No ruleset provided"),
181
- reference=f"{article_outline.as_prompt()}\n # Fatal Error of the Original Article Outline\n{pack}",
186
+ reference=f"{article_outline.display()}\n # Fatal Error of the Original Article Outline\n{pack}",
182
187
  ),
183
188
  "Could not correct the component.",
184
189
  ).update_ref(origin)
@@ -191,107 +196,6 @@ class FixIntrospectedErrors(Action, Censor):
191
196
  return article_outline
192
197
 
193
198
 
194
- class FixIllegalReferences(Action, Censor):
195
- """Fix illegal references in the article outline."""
196
-
197
- output_key: str = "illegal_references_fixed_outline"
198
- """The key of the output data."""
199
-
200
- ruleset: Optional[RuleSet] = None
201
- """Ruleset to use to fix the illegal references."""
202
- max_error_count: Optional[int] = None
203
- """The maximum number of errors to fix."""
204
-
205
- async def _execute(
206
- self,
207
- article_outline: ArticleOutline,
208
- ref_fix_ruleset: Optional[RuleSet] = None,
209
- **_,
210
- ) -> Optional[ArticleOutline]:
211
- counter = 0
212
- while pack := article_outline.find_illegal_ref(gather_identical=True):
213
- logger.info(f"Found {counter}th illegal references")
214
- ref_seq, err = ok(pack)
215
- logger.warning(f"Found illegal referring error: {err}")
216
- new = ok(
217
- await self.censor_obj(
218
- ref_seq[0],
219
- ruleset=ok(ref_fix_ruleset or self.ruleset, "No ruleset provided"),
220
- reference=f"{article_outline.as_prompt()}\n# Some Basic errors found that need to be fixed\n{err}",
221
- ),
222
- "Could not correct the component",
223
- )
224
- for r in ref_seq:
225
- r.update_from(new)
226
- if self.max_error_count and counter > self.max_error_count:
227
- logger.warning("Max error count reached, stopping.")
228
- break
229
- counter += 1
230
-
231
- return article_outline
232
-
233
-
234
- class TweakOutlineForwardRef(Action, Censor):
235
- """Tweak the forward references in the article outline.
236
-
237
- Ensures that the conclusions of the current chapter effectively support the analysis of subsequent chapters.
238
- """
239
-
240
- output_key: str = "article_outline_fw_ref_checked"
241
- ruleset: Optional[RuleSet] = None
242
- """Ruleset to use to fix the illegal references."""
243
-
244
- async def _execute(
245
- self, article_outline: ArticleOutline, ref_twk_ruleset: Optional[RuleSet] = None, **cxt
246
- ) -> ArticleOutline:
247
- return await self._inner(
248
- article_outline,
249
- ruleset=ok(ref_twk_ruleset or self.ruleset, "No ruleset provided"),
250
- field_name="support_to",
251
- )
252
-
253
- async def _inner(self, article_outline: ArticleOutline, ruleset: RuleSet, field_name: str) -> ArticleOutline:
254
- await gather(
255
- *[self._loop(a[-1], article_outline, field_name, ruleset) for a in article_outline.iter_subsections()],
256
- )
257
-
258
- return article_outline
259
-
260
- async def _loop(
261
- self, a: SubSectionBase, article_outline: ArticleOutline, field_name: str, ruleset: RuleSet
262
- ) -> None:
263
- if judge := await self.evidently_judge(
264
- f"{article_outline.as_prompt()}\n\n{a.display()}\n"
265
- f"Does the `{a.__class__.__name__}`'s `{field_name}` field need to be extended or tweaked?"
266
- ):
267
- await self.censor_obj_inplace(
268
- a,
269
- ruleset=ruleset,
270
- reference=f"{article_outline.as_prompt()}\n"
271
- f"The Article component titled `{a.title}` whose `{field_name}` field needs to be extended or tweaked.\n"
272
- f"# Judgement\n{judge.display()}",
273
- )
274
-
275
-
276
- class TweakOutlineBackwardRef(TweakOutlineForwardRef):
277
- """Tweak the backward references in the article outline.
278
-
279
- Ensures that the prerequisites of the current chapter are correctly referenced in the `depend_on` field.
280
- """
281
-
282
- output_key: str = "article_outline_bw_ref_checked"
283
- ruleset: Optional[RuleSet] = None
284
-
285
- async def _execute(
286
- self, article_outline: ArticleOutline, ref_twk_ruleset: Optional[RuleSet] = None, **cxt
287
- ) -> ArticleOutline:
288
- return await self._inner(
289
- article_outline,
290
- ruleset=ok(ref_twk_ruleset or self.ruleset, "No ruleset provided"),
291
- field_name="depend_on",
292
- )
293
-
294
-
295
199
  class GenerateArticle(Action, Censor):
296
200
  """Generate the article based on the outline."""
297
201
 
@@ -1,11 +1,15 @@
1
1
  """A module for writing articles using RAG (Retrieval-Augmented Generation) capabilities."""
2
2
 
3
3
  from asyncio import gather
4
- from typing import Optional
4
+ from pathlib import Path
5
+ from typing import List, Optional
5
6
 
7
+ from fabricatio import BibManager
6
8
  from fabricatio.capabilities.censor import Censor
7
9
  from fabricatio.capabilities.rag import RAG
8
10
  from fabricatio.models.action import Action
11
+ from fabricatio.models.extra.aricle_rag import ArticleChunk
12
+ from fabricatio.models.extra.article_essence import ArticleEssence
9
13
  from fabricatio.models.extra.article_main import Article, ArticleSubsection
10
14
  from fabricatio.models.extra.rule import RuleSet
11
15
  from fabricatio.utils import ok
@@ -97,9 +101,36 @@ class TweakArticleRAG(Action, RAG, Censor):
97
101
  await self.censor_obj_inplace(
98
102
  subsec,
99
103
  ruleset=ruleset,
100
- reference=f"{await self.aretrieve_compact(refind_q, final_limit=self.ref_limit)}\n\n"
104
+ reference=f"{'\n\n'.join(d.display() for d in await self.aretrieve(refind_q, document_model=ArticleEssence, final_limit=self.ref_limit))}\n\n"
101
105
  f"You can use Reference above to rewrite the `{subsec.__class__.__name__}`.\n"
102
106
  f"You should Always use `{subsec.language}` as written language, "
103
107
  f"which is the original language of the `{subsec.title}`. "
104
108
  f"since rewrite a `{subsec.__class__.__name__}` in a different language is usually a bad choice",
105
109
  )
110
+
111
+
112
+ class ChunkArticle(Action):
113
+ """Chunk an article into smaller chunks."""
114
+
115
+ output_key:str = "article_chunks"
116
+ """The key used to store the output of the action."""
117
+ max_chunk_size: Optional[int] = None
118
+ """The maximum size of each chunk."""
119
+ max_overlapping_rate: Optional[float] = None
120
+ """The maximum overlapping rate between chunks."""
121
+
122
+ async def _execute(
123
+ self,
124
+ article_path: str | Path,
125
+ bib_manager: BibManager,
126
+ max_chunk_size: Optional[int] = None,
127
+ max_overlapping_rate: Optional[float] = None,
128
+ **_,
129
+ ) -> List[ArticleChunk]:
130
+ return ArticleChunk.from_file(
131
+ article_path,
132
+ bib_manager,
133
+ max_chunk_size=ok(max_chunk_size or self.max_chunk_size, "No max_chunk_size provided!"),
134
+ max_overlapping_rate=ok(max_overlapping_rate or self.max_overlapping_rate, "No max_overlapping_rate provided!"),
135
+ )
136
+
fabricatio/actions/rag.py CHANGED
@@ -5,34 +5,56 @@ from typing import List, Optional
5
5
  from questionary import text
6
6
 
7
7
  from fabricatio.capabilities.rag import RAG
8
+ from fabricatio.config import configs
8
9
  from fabricatio.journal import logger
9
10
  from fabricatio.models.action import Action
10
- from fabricatio.models.generic import Vectorizable
11
+ from fabricatio.models.extra.rag import MilvusClassicModel, MilvusDataBase
11
12
  from fabricatio.models.task import Task
13
+ from fabricatio.utils import ok
12
14
 
13
15
 
14
16
  class InjectToDB(Action, RAG):
15
17
  """Inject data into the database."""
16
18
 
17
19
  output_key: str = "collection_name"
20
+ collection_name: str = "my_collection"
21
+ """The name of the collection to inject data into."""
18
22
 
19
- async def _execute[T: Vectorizable](
20
- self, to_inject: Optional[T] | List[Optional[T]], collection_name: str = "my_collection",override_inject:bool=False, **_
23
+ async def _execute[T: MilvusDataBase](
24
+ self, to_inject: Optional[T] | List[Optional[T]], override_inject: bool = False, **_
21
25
  ) -> Optional[str]:
26
+ from pymilvus.milvus_client import IndexParams
27
+
28
+ if to_inject is None:
29
+ return None
22
30
  if not isinstance(to_inject, list):
23
31
  to_inject = [to_inject]
24
- logger.info(f"Injecting {len(to_inject)} items into the collection '{collection_name}'")
32
+ if not (seq := [t for t in to_inject if t is not None]): # filter out None
33
+ return None
34
+ logger.info(f"Injecting {len(seq)} items into the collection '{self.collection_name}'")
25
35
  if override_inject:
26
- self.check_client().client.drop_collection(collection_name)
27
- await self.view(collection_name, create=True).consume_string(
28
- [
29
- t.prepare_vectorization(self.embedding_max_sequence_length)
30
- for t in to_inject
31
- if isinstance(t, Vectorizable)
32
- ],
33
- )
34
-
35
- return collection_name
36
+ self.check_client().client.drop_collection(self.collection_name)
37
+
38
+ await self.view(
39
+ self.collection_name,
40
+ create=True,
41
+ schema=seq[0].as_milvus_schema(
42
+ ok(
43
+ self.milvus_dimensions
44
+ or configs.rag.milvus_dimensions
45
+ or self.embedding_dimensions
46
+ or configs.embedding.dimensions
47
+ ),
48
+ ),
49
+ index_params=IndexParams(
50
+ seq[0].vector_field_name,
51
+ index_name=seq[0].vector_field_name,
52
+ index_type=seq[0].index_type,
53
+ metric_type=seq[0].metric_type,
54
+ ),
55
+ ).add_document(seq, flush=True)
56
+
57
+ return self.collection_name
36
58
 
37
59
 
38
60
  class RAGTalk(Action, RAG):
@@ -62,10 +84,10 @@ class RAGTalk(Action, RAG):
62
84
  user_say = await text("User: ").ask_async()
63
85
  if user_say is None:
64
86
  break
65
- gpt_say = await self.aask_retrieved(
66
- user_say,
67
- user_say,
68
- extra_system_message=f"You have to answer to user obeying task assigned to you:\n{task_input.briefing}",
87
+ ret: List[MilvusClassicModel] = await self.aretrieve(user_say, document_model=MilvusClassicModel)
88
+
89
+ gpt_say = await self.aask(
90
+ user_say, system_message="\n".join(m.text for m in ret) + "\nYou can refer facts provided above."
69
91
  )
70
92
  print(f"GPT: {gpt_say}") # noqa: T201
71
93
  counter += 1
@@ -104,7 +104,8 @@ class Check(AdvancedJudge, Propose):
104
104
  - Proposes Improvement only when violation is confirmed
105
105
  """
106
106
  if judge := await self.evidently_judge(
107
- f"# Content to exam\n{input_text}\n\n# Rule Must to follow\n{rule.display()}\nDoes `Content to exam` provided above violate the `Rule Must to follow` provided above?",
107
+ f"# Content to exam\n{input_text}\n\n# Rule Must to follow\n{rule.display()}\nDoes `Content to exam` provided above violate the `{rule.name}` provided above?"
108
+ f"should I take some measure to fix that violation? true for I do need, false for I don't need.",
108
109
  **override_kwargs(kwargs, default=None),
109
110
  ):
110
111
  logger.info(f"Rule `{rule.name}` violated: \n{judge.display()}")