fabricatio 0.3.15.dev4__cp312-cp312-win_amd64.whl → 0.4.4__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. fabricatio/__init__.py +7 -8
  2. fabricatio/actions/__init__.py +69 -1
  3. fabricatio/capabilities/__init__.py +59 -1
  4. fabricatio/models/__init__.py +47 -0
  5. fabricatio/rust.cp312-win_amd64.pyd +0 -0
  6. fabricatio/toolboxes/__init__.py +2 -1
  7. fabricatio/toolboxes/arithmetic.py +1 -1
  8. fabricatio/toolboxes/fs.py +2 -2
  9. fabricatio/workflows/__init__.py +9 -0
  10. fabricatio-0.4.4.data/scripts/tdown.exe +0 -0
  11. {fabricatio-0.3.15.dev4.dist-info → fabricatio-0.4.4.dist-info}/METADATA +49 -25
  12. fabricatio-0.4.4.dist-info/RECORD +15 -0
  13. fabricatio/actions/article.py +0 -415
  14. fabricatio/actions/article_rag.py +0 -407
  15. fabricatio/actions/fs.py +0 -25
  16. fabricatio/actions/output.py +0 -248
  17. fabricatio/actions/rag.py +0 -96
  18. fabricatio/actions/rules.py +0 -83
  19. fabricatio/capabilities/advanced_judge.py +0 -20
  20. fabricatio/capabilities/advanced_rag.py +0 -61
  21. fabricatio/capabilities/censor.py +0 -105
  22. fabricatio/capabilities/check.py +0 -212
  23. fabricatio/capabilities/correct.py +0 -228
  24. fabricatio/capabilities/extract.py +0 -74
  25. fabricatio/capabilities/persist.py +0 -103
  26. fabricatio/capabilities/propose.py +0 -65
  27. fabricatio/capabilities/rag.py +0 -264
  28. fabricatio/capabilities/rating.py +0 -404
  29. fabricatio/capabilities/review.py +0 -114
  30. fabricatio/capabilities/task.py +0 -113
  31. fabricatio/decorators.py +0 -253
  32. fabricatio/emitter.py +0 -177
  33. fabricatio/fs/__init__.py +0 -35
  34. fabricatio/fs/curd.py +0 -153
  35. fabricatio/fs/readers.py +0 -61
  36. fabricatio/journal.py +0 -12
  37. fabricatio/models/action.py +0 -263
  38. fabricatio/models/adv_kwargs_types.py +0 -63
  39. fabricatio/models/extra/__init__.py +0 -1
  40. fabricatio/models/extra/advanced_judge.py +0 -32
  41. fabricatio/models/extra/aricle_rag.py +0 -286
  42. fabricatio/models/extra/article_base.py +0 -486
  43. fabricatio/models/extra/article_essence.py +0 -101
  44. fabricatio/models/extra/article_main.py +0 -286
  45. fabricatio/models/extra/article_outline.py +0 -46
  46. fabricatio/models/extra/article_proposal.py +0 -52
  47. fabricatio/models/extra/patches.py +0 -20
  48. fabricatio/models/extra/problem.py +0 -165
  49. fabricatio/models/extra/rag.py +0 -98
  50. fabricatio/models/extra/rule.py +0 -52
  51. fabricatio/models/generic.py +0 -812
  52. fabricatio/models/kwargs_types.py +0 -121
  53. fabricatio/models/role.py +0 -99
  54. fabricatio/models/task.py +0 -310
  55. fabricatio/models/tool.py +0 -328
  56. fabricatio/models/usages.py +0 -791
  57. fabricatio/parser.py +0 -114
  58. fabricatio/rust.pyi +0 -846
  59. fabricatio/utils.py +0 -156
  60. fabricatio/workflows/articles.py +0 -24
  61. fabricatio/workflows/rag.py +0 -11
  62. fabricatio-0.3.15.dev4.data/scripts/tdown.exe +0 -0
  63. fabricatio-0.3.15.dev4.data/scripts/ttm.exe +0 -0
  64. fabricatio-0.3.15.dev4.dist-info/RECORD +0 -64
  65. {fabricatio-0.3.15.dev4.dist-info → fabricatio-0.4.4.dist-info}/WHEEL +0 -0
  66. {fabricatio-0.3.15.dev4.dist-info → fabricatio-0.4.4.dist-info}/licenses/LICENSE +0 -0
@@ -1,407 +0,0 @@
1
- """A module for writing articles using RAG (Retrieval-Augmented Generation) capabilities."""
2
-
3
- from asyncio import gather
4
- from pathlib import Path
5
- from typing import ClassVar, List, Optional
6
-
7
- from pydantic import Field, PositiveInt
8
-
9
- from fabricatio.capabilities.advanced_rag import AdvancedRAG
10
- from fabricatio.capabilities.censor import Censor
11
- from fabricatio.capabilities.extract import Extract
12
- from fabricatio.capabilities.rag import RAG
13
- from fabricatio.decorators import precheck_package
14
- from fabricatio.journal import logger
15
- from fabricatio.models.action import Action
16
- from fabricatio.models.extra.aricle_rag import ArticleChunk, CitationManager
17
- from fabricatio.models.extra.article_essence import ArticleEssence
18
- from fabricatio.models.extra.article_main import Article, ArticleChapter, ArticleSection, ArticleSubsection
19
- from fabricatio.models.extra.article_outline import ArticleOutline
20
- from fabricatio.models.extra.rule import RuleSet
21
- from fabricatio.models.kwargs_types import ChooseKwargs, LLMKwargs
22
- from fabricatio.rust import (
23
- BibManager,
24
- convert_all_tex_math,
25
- fix_misplaced_labels,
26
- )
27
- from fabricatio.utils import ok
28
-
29
- TYPST_CITE_USAGE = (
30
- "citation number is REQUIRED to cite any reference!'\n"
31
- "Legal citing syntax examples(seperated by |): [[1]]|[[1,2]]|[[1-3]]|[[12,13-15]]|[[1-3,5-7]]\n"
32
- "Illegal citing syntax examples(seperated by |): [[1],[2],[3]]|[[1],[1-2]]\n"
33
- "You SHALL not cite a single reference more than once!"
34
- "It's recommended to cite multiple references that supports your conclusion at a time.\n"
35
- )
36
-
37
- TYPST_MATH_USAGE = (
38
- "Wrap inline expression with '\\(' and '\\)',like '\\(>5m\\)' '\\(89%\\)', and wrap block equation with '\\[' and '\\]'.\n"
39
- "In addition to that, you can add a label outside the block equation which can be used as a cross reference identifier, the label is a string wrapped in `<` and `>` like `<energy-release-rate-equation>`.Note that the label string should be a summarizing title for the equation being labeled and should never be written within the formula block.\n"
40
- "you can refer to that label by using the syntax with prefix of `@eqt:`, which indicate that this notation is citing a label from the equations. For example ' @eqt:energy-release-rate-equation ' DO remember that the notation shall have both suffixed and prefixed space char which enable the compiler to distinguish the notation from the plaintext."
41
- "Below is two usage example:\n"
42
- "```typst\n"
43
- "See @eqt:mass-energy-equation , it's the foundation of physics.\n"
44
- "\\[\n"
45
- "E = m c^2\n"
46
- "\\] <mass-energy-equation>\n\n\n"
47
- "In @eqt:mass-energy-equation , \\(m\\) stands for mass, \\(c\\) stands for speed of light, and \\(E\\) stands for energy. \n"
48
- "```\n"
49
- )
50
-
51
-
52
- class WriteArticleContentRAG(Action, Extract, AdvancedRAG):
53
- """Write an article based on the provided outline."""
54
-
55
- ctx_override: ClassVar[bool] = True
56
- search_increment_multiplier: float = 1.6
57
- """The increment multiplier of the search increment."""
58
- ref_limit: int = 35
59
- """The limit of references to be retrieved"""
60
- threshold: float = 0.62
61
- """The threshold of relevance"""
62
- extractor_model: LLMKwargs
63
- """The model to use for extracting the content from the retrieved references."""
64
- query_model: ChooseKwargs
65
- """The model to use for querying the database"""
66
- supervisor: bool = False
67
- """Whether to use supervisor mode"""
68
- result_per_query: PositiveInt = 4
69
- """The number of results to be returned per query."""
70
- cite_req: str = TYPST_CITE_USAGE
71
- """The req of the write article content."""
72
-
73
- math_req: str = TYPST_MATH_USAGE
74
- """The req of the write article content."""
75
- tei_endpoint: Optional[str] = None
76
-
77
- async def _execute(
78
- self,
79
- article_outline: ArticleOutline,
80
- collection_name: Optional[str] = None,
81
- supervisor: Optional[bool] = None,
82
- **cxt,
83
- ) -> Article:
84
- article = Article.from_outline(article_outline).update_ref(article_outline)
85
- self.target_collection = collection_name or self.safe_target_collection
86
- if supervisor or (supervisor is None and self.supervisor):
87
- for chap, sec, subsec in article.iter_subsections():
88
- await self._supervisor_inner(article, article_outline, chap, sec, subsec)
89
-
90
- else:
91
- await gather(
92
- *[
93
- self._inner(article, article_outline, chap, sec, subsec)
94
- for chap, sec, subsec in article.iter_subsections()
95
- ]
96
- )
97
- return article.convert_tex()
98
-
99
- @precheck_package(
100
- "questionary", "`questionary` is required for supervisor mode, please install it by `fabricatio[qa]`"
101
- )
102
- async def _supervisor_inner(
103
- self,
104
- article: Article,
105
- article_outline: ArticleOutline,
106
- chap: ArticleChapter,
107
- sec: ArticleSection,
108
- subsec: ArticleSubsection,
109
- ) -> ArticleSubsection:
110
- from questionary import confirm, text
111
- from rich import print as r_print
112
-
113
- cm = CitationManager()
114
- await self.search_database(article, article_outline, chap, sec, subsec, cm)
115
-
116
- raw_paras = await self.write_raw(article, article_outline, chap, sec, subsec, cm)
117
- r_print(raw_paras)
118
-
119
- while not await confirm("Accept this version and continue?").ask_async():
120
- if inst := await text("Search for more refs for additional spec.").ask_async():
121
- await self.search_database(article, article_outline, chap, sec, subsec, cm, extra_instruction=inst)
122
-
123
- if instruction := await text("Enter the instructions to improve").ask_async():
124
- raw_paras = await self.write_raw(article, article_outline, chap, sec, subsec, cm, instruction)
125
- if edt := await text("Edit", default=raw_paras).ask_async():
126
- raw_paras = edt
127
-
128
- raw_paras = fix_misplaced_labels(raw_paras)
129
- raw_paras = convert_all_tex_math(raw_paras)
130
-
131
- r_print(raw_paras)
132
-
133
- return await self.extract_new_subsec(subsec, raw_paras, cm)
134
-
135
- async def _inner(
136
- self,
137
- article: Article,
138
- article_outline: ArticleOutline,
139
- chap: ArticleChapter,
140
- sec: ArticleSection,
141
- subsec: ArticleSubsection,
142
- ) -> ArticleSubsection:
143
- cm = CitationManager()
144
-
145
- await self.search_database(article, article_outline, chap, sec, subsec, cm)
146
-
147
- raw_paras = await self.write_raw(article, article_outline, chap, sec, subsec, cm)
148
-
149
- raw_paras = "\n".join(p for p in raw_paras.splitlines() if p and not p.endswith("**") and not p.startswith("#"))
150
-
151
- raw_paras = fix_misplaced_labels(raw_paras)
152
- raw_paras = convert_all_tex_math(raw_paras)
153
-
154
- return await self.extract_new_subsec(subsec, raw_paras, cm)
155
-
156
- async def extract_new_subsec(
157
- self, subsec: ArticleSubsection, raw_paras: str, cm: CitationManager
158
- ) -> ArticleSubsection:
159
- """Extract the new subsec."""
160
- new_subsec = ok(
161
- await self.extract(
162
- ArticleSubsection,
163
- raw_paras,
164
- f"Above is the subsection titled `{subsec.title}`.\n"
165
- f"I need you to extract the content to construct a new `{ArticleSubsection.__class__.__name__}`,"
166
- f"Do not attempt to change the original content, your job is ONLY content extraction",
167
- **self.extractor_model,
168
- ),
169
- "Failed to propose new subsection.",
170
- )
171
-
172
- for p in new_subsec.paragraphs:
173
- p.content = cm.apply(p.content)
174
- p.description = cm.apply(p.description)
175
- subsec.update_from(new_subsec)
176
- logger.debug(f"{subsec.title}:rpl\n{subsec.display()}")
177
- return subsec
178
-
179
- async def write_raw(
180
- self,
181
- article: Article,
182
- article_outline: ArticleOutline,
183
- chap: ArticleChapter,
184
- sec: ArticleSection,
185
- subsec: ArticleSubsection,
186
- cm: CitationManager,
187
- extra_instruction: str = "",
188
- ) -> str:
189
- """Write the raw paragraphs of the subsec."""
190
- return await self.aask(
191
- f"{cm.as_prompt()}\nAbove is some related reference from other auther retrieved for you."
192
- f"{article_outline.finalized_dump()}\n\nAbove is my article outline, I m writing graduate thesis titled `{article.title}`. "
193
- f"More specifically, i m witting the Chapter `{chap.title}` >> Section `{sec.title}` >> Subsection `{subsec.title}`.\n"
194
- f"Please help me write the paragraphs of the subsec mentioned above, which is `{subsec.title}`.\n"
195
- f"{self.cite_req}\n{self.math_req}\n"
196
- f"You SHALL use `{article.language}` as writing language.\n{extra_instruction}\n"
197
- f"Do not use numbered list to display the outcome, you should regard you are writing the main text of the thesis.\n"
198
- f"You should not copy others' works from the references directly on to my thesis, we can only harness the conclusion they have drawn.\n"
199
- f"No extra explanation is allowed."
200
- )
201
-
202
- async def search_database(
203
- self,
204
- article: Article,
205
- article_outline: ArticleOutline,
206
- chap: ArticleChapter,
207
- sec: ArticleSection,
208
- subsec: ArticleSubsection,
209
- cm: CitationManager,
210
- extra_instruction: str = "",
211
- ) -> None:
212
- """Search database for related references."""
213
- search_req = (
214
- f"{article_outline.finalized_dump()}\n\nAbove is my article outline, I m writing graduate thesis titled `{article.title}`. "
215
- f"More specifically, i m witting the Chapter `{chap.title}` >> Section `{sec.title}` >> Subsection `{subsec.title}`.\n"
216
- f"I need to search related references to build up the content of the subsec mentioned above, which is `{subsec.title}`.\n"
217
- f"provide 10~16 queries as possible, to get best result!\n"
218
- f"You should provide both English version and chinese version of the refined queries!\n{extra_instruction}"
219
- )
220
-
221
- await self.clued_search(
222
- search_req,
223
- cm,
224
- refinery_kwargs=self.query_model,
225
- expand_multiplier=self.search_increment_multiplier,
226
- base_accepted=self.ref_limit,
227
- result_per_query=self.result_per_query,
228
- similarity_threshold=self.threshold,
229
- tei_endpoint=self.tei_endpoint,
230
- )
231
-
232
-
233
- class ArticleConsultRAG(Action, AdvancedRAG):
234
- """Write an article based on the provided outline."""
235
-
236
- ctx_override: ClassVar[bool] = True
237
- output_key: str = "consult_count"
238
- search_increment_multiplier: float = 1.6
239
- """The multiplier to increase the limit of references to retrieve per query."""
240
- ref_limit: int = 26
241
- """The final limit of references."""
242
- ref_per_q: int = 13
243
- """The limit of references to retrieve per query."""
244
- similarity_threshold: float = 0.62
245
- """The similarity threshold of references to retrieve."""
246
- ref_q_model: ChooseKwargs = Field(default_factory=ChooseKwargs)
247
- """The model to use for refining query."""
248
- req: str = TYPST_CITE_USAGE
249
- """The request for the rag model."""
250
- tei_endpoint: Optional[str] = None
251
-
252
- @precheck_package(
253
- "questionary", "`questionary` is required for supervisor mode, please install it by `fabricatio[qa]`"
254
- )
255
- async def _execute(self, collection_name: Optional[str] = None, **cxt) -> int:
256
- from questionary import confirm, text
257
- from rich import print as r_print
258
-
259
- self.target_collection = collection_name or self.safe_target_collection
260
-
261
- cm = CitationManager()
262
-
263
- counter = 0
264
- while (req := await text("User: ").ask_async()) is not None:
265
- if await confirm("Empty the cm?").ask_async():
266
- cm.empty()
267
-
268
- req = convert_all_tex_math(req)
269
-
270
- await self.clued_search(
271
- req,
272
- cm,
273
- refinery_kwargs=self.ref_q_model,
274
- expand_multiplier=self.search_increment_multiplier,
275
- base_accepted=self.ref_limit,
276
- result_per_query=self.ref_per_q,
277
- similarity_threshold=self.similarity_threshold,
278
- tei_endpoint=self.tei_endpoint,
279
- )
280
-
281
- ret = await self.aask(f"{cm.as_prompt()}\n{self.req}\n{req}")
282
-
283
- ret = fix_misplaced_labels(ret)
284
- ret = convert_all_tex_math(ret)
285
- ret = cm.apply(ret)
286
-
287
- r_print(ret)
288
- counter += 1
289
- logger.info(f"{counter} rounds of conversation.")
290
- return counter
291
-
292
-
293
- class TweakArticleRAG(Action, RAG, Censor):
294
- """Write an article based on the provided outline.
295
-
296
- This class inherits from `Action`, `RAG`, and `Censor` to provide capabilities for writing and refining articles
297
- using Retrieval-Augmented Generation (RAG) techniques. It processes an article outline, enhances subsections by
298
- searching for related references, and applies censoring rules to ensure compliance with the provided ruleset.
299
-
300
- Attributes:
301
- output_key (str): The key used to store the output of the action.
302
- ruleset (Optional[RuleSet]): The ruleset to be used for censoring the article.
303
- """
304
-
305
- output_key: str = "rag_tweaked_article"
306
- """The key used to store the output of the action."""
307
-
308
- ruleset: Optional[RuleSet] = None
309
- """The ruleset to be used for censoring the article."""
310
-
311
- ref_limit: int = 30
312
- """The limit of references to be retrieved"""
313
-
314
- async def _execute(
315
- self,
316
- article: Article,
317
- collection_name: str = "article_essence",
318
- twk_rag_ruleset: Optional[RuleSet] = None,
319
- parallel: bool = False,
320
- **cxt,
321
- ) -> Article:
322
- """Write an article based on the provided outline.
323
-
324
- This method processes the article outline, either in parallel or sequentially, by enhancing each subsection
325
- with relevant references and applying censoring rules.
326
-
327
- Args:
328
- article (Article): The article to be processed.
329
- collection_name (str): The name of the collection to view for processing.
330
- twk_rag_ruleset (Optional[RuleSet]): The ruleset to apply for censoring. If not provided, the class's ruleset is used.
331
- parallel (bool): If True, process subsections in parallel. Otherwise, process them sequentially.
332
- **cxt: Additional context parameters.
333
-
334
- Returns:
335
- Article: The processed article with enhanced subsections and applied censoring rules.
336
- """
337
- self.view(collection_name)
338
-
339
- if parallel:
340
- await gather(
341
- *[
342
- self._inner(article, subsec, ok(twk_rag_ruleset or self.ruleset, "No ruleset provided!"))
343
- for _, __, subsec in article.iter_subsections()
344
- ],
345
- return_exceptions=True,
346
- )
347
- else:
348
- for _, __, subsec in article.iter_subsections():
349
- await self._inner(article, subsec, ok(twk_rag_ruleset or self.ruleset, "No ruleset provided!"))
350
- return article
351
-
352
- async def _inner(self, article: Article, subsec: ArticleSubsection, ruleset: RuleSet) -> None:
353
- """Enhance a subsection of the article with references and apply censoring rules.
354
-
355
- This method refines the query for the subsection, retrieves related references, and applies censoring rules
356
- to the subsection's paragraphs.
357
-
358
- Args:
359
- article (Article): The article containing the subsection.
360
- subsec (ArticleSubsection): The subsection to be enhanced.
361
- ruleset (RuleSet): The ruleset to apply for censoring.
362
-
363
- Returns:
364
- None
365
- """
366
- refind_q = ok(
367
- await self.arefined_query(
368
- f"{article.referenced.as_prompt()}\n# Subsection requiring reference enhancement\n{subsec.display()}\n"
369
- )
370
- )
371
- await self.censor_obj_inplace(
372
- subsec,
373
- ruleset=ruleset,
374
- reference=f"{'\n\n'.join(d.display() for d in await self.aretrieve(refind_q, document_model=ArticleEssence, max_accepted=self.ref_limit))}\n\n"
375
- f"You can use Reference above to rewrite the `{subsec.__class__.__name__}`.\n"
376
- f"You should Always use `{subsec.language}` as written language, "
377
- f"which is the original language of the `{subsec.title}`. "
378
- f"since rewrite a `{subsec.__class__.__name__}` in a different language is usually a bad choice",
379
- )
380
-
381
-
382
- class ChunkArticle(Action):
383
- """Chunk an article into smaller chunks."""
384
-
385
- output_key: str = "article_chunks"
386
- """The key used to store the output of the action."""
387
- max_chunk_size: Optional[int] = None
388
- """The maximum size of each chunk."""
389
- max_overlapping_rate: Optional[float] = None
390
- """The maximum overlapping rate between chunks."""
391
-
392
- async def _execute(
393
- self,
394
- article_path: str | Path,
395
- bib_manager: BibManager,
396
- max_chunk_size: Optional[int] = None,
397
- max_overlapping_rate: Optional[float] = None,
398
- **_,
399
- ) -> List[ArticleChunk]:
400
- return ArticleChunk.from_file(
401
- article_path,
402
- bib_manager,
403
- max_chunk_size=ok(max_chunk_size or self.max_chunk_size, "No max_chunk_size provided!"),
404
- max_overlapping_rate=ok(
405
- max_overlapping_rate or self.max_overlapping_rate, "No max_overlapping_rate provided!"
406
- ),
407
- )
fabricatio/actions/fs.py DELETED
@@ -1,25 +0,0 @@
1
- """A module for file system utilities."""
2
-
3
- from pathlib import Path
4
- from typing import Any, List, Mapping, Self
5
-
6
- from fabricatio.fs import safe_text_read
7
- from fabricatio.journal import logger
8
- from fabricatio.models.action import Action
9
- from fabricatio.models.generic import FromMapping
10
-
11
-
12
- class ReadText(Action, FromMapping):
13
- """Read text from a file."""
14
- output_key: str = "read_text"
15
- read_path: str | Path
16
- """Path to the file to read."""
17
-
18
- async def _execute(self, *_: Any, **cxt) -> str:
19
- logger.info(f"Read text from {Path(self.read_path).as_posix()} to {self.output_key}")
20
- return safe_text_read(self.read_path)
21
-
22
- @classmethod
23
- def from_mapping(cls, mapping: Mapping[str, str | Path], **kwargs: Any) -> List[Self]:
24
- """Create a list of ReadText actions from a mapping of output_key to read_path."""
25
- return [cls(read_path=p, output_key=k, **kwargs) for k, p in mapping.items()]
@@ -1,248 +0,0 @@
1
- """Dump the finalized output to a file."""
2
-
3
- from pathlib import Path
4
- from typing import Any, Iterable, List, Mapping, Optional, Self, Sequence, Type
5
-
6
- from fabricatio.capabilities.persist import PersistentAble
7
- from fabricatio.fs import dump_text
8
- from fabricatio.journal import logger
9
- from fabricatio.models.action import Action
10
- from fabricatio.models.generic import FinalizedDumpAble, FromMapping, FromSequence
11
- from fabricatio.models.task import Task
12
- from fabricatio.models.usages import LLMUsage
13
- from fabricatio.rust import TEMPLATE_MANAGER
14
- from fabricatio.utils import ok
15
-
16
-
17
- class DumpFinalizedOutput(Action, LLMUsage):
18
- """Dump the finalized output to a file."""
19
-
20
- output_key: str = "dump_path"
21
- dump_path: Optional[str] = None
22
-
23
- async def _execute(
24
- self,
25
- to_dump: FinalizedDumpAble,
26
- task_input: Optional[Task] = None,
27
- dump_path: Optional[str | Path] = None,
28
- **_,
29
- ) -> str:
30
- dump_path = Path(
31
- dump_path
32
- or self.dump_path
33
- or ok(
34
- await self.awhich_pathstr(
35
- f"{ok(task_input, 'Neither `task_input` and `dump_path` is provided.').briefing}\n\nExtract a single path of the file, to which I will dump the data."
36
- ),
37
- "Could not find the path of file to dump the data.",
38
- )
39
- )
40
- logger.info(f"Saving output to {dump_path.as_posix()}")
41
- ok(to_dump, "Could not dump the data since the path is not specified.").finalized_dump_to(dump_path)
42
- return dump_path.as_posix()
43
-
44
-
45
- class RenderedDump(Action, LLMUsage):
46
- """Render the data to a file."""
47
-
48
- output_key: str = "dump_path"
49
- dump_path: Optional[str] = None
50
-
51
- template_name: str
52
- """The template name to render the data."""
53
-
54
- async def _execute(
55
- self,
56
- to_dump: FinalizedDumpAble,
57
- task_input: Optional[Task] = None,
58
- dump_path: Optional[str | Path] = None,
59
- **_,
60
- ) -> str:
61
- dump_path = Path(
62
- dump_path
63
- or self.dump_path
64
- or ok(
65
- await self.awhich_pathstr(
66
- f"{ok(task_input, 'Neither `task_input` and `dump_path` is provided.').briefing}\n\nExtract a single path of the file, to which I will dump the data."
67
- ),
68
- "Could not find the path of file to dump the data.",
69
- )
70
- )
71
-
72
- logger.info(f"Saving output to {dump_path.as_posix()}")
73
- dump_text(
74
- dump_path,
75
- TEMPLATE_MANAGER.render_template(
76
- self.template_name, {to_dump.__class__.__name__: to_dump.finalized_dump()}
77
- ),
78
- )
79
-
80
- return dump_path.as_posix()
81
-
82
-
83
- class PersistentAll(Action, LLMUsage):
84
- """Persist all the data to a file."""
85
-
86
- output_key: str = "persistent_count"
87
- """The number of objects persisted."""
88
- persist_dir: Optional[str] = None
89
- """The directory to persist the data."""
90
- override: bool = False
91
- """Whether to remove the existing dir before dumping."""
92
-
93
- async def _execute(
94
- self,
95
- task_input: Optional[Task] = None,
96
- persist_dir: Optional[str | Path] = None,
97
- **cxt,
98
- ) -> int:
99
- persist_dir = Path(
100
- persist_dir
101
- or self.persist_dir
102
- or ok(
103
- await self.awhich_pathstr(
104
- f"{ok(task_input, 'Neither `task_input` and `dump_path` is provided.').briefing}\n\nExtract a single path of the file, to which I will persist the data."
105
- ),
106
- "Can not find the path of file to persist the data.",
107
- )
108
- )
109
-
110
- count = 0
111
- if persist_dir.is_file():
112
- logger.warning("Dump should be a directory, but it is a file. Skip dumping.")
113
- return count
114
- if self.override and persist_dir.is_dir():
115
- logger.info(f"Override the existing directory {persist_dir.as_posix()}.")
116
- persist_dir.rmdir()
117
- logger.info(f"Starting persistence in directory {persist_dir}")
118
- for k, v in cxt.items():
119
- final_dir = persist_dir.joinpath(k)
120
- logger.debug(f"Checking key {k} for persistence")
121
- if isinstance(v, PersistentAble):
122
- logger.info(f"Persisting object {k} to {final_dir}")
123
- final_dir.mkdir(parents=True, exist_ok=True)
124
- v.persist(final_dir)
125
- count += 1
126
- if isinstance(v, Iterable) and any(
127
- persistent_ables := (pers for pers in v if isinstance(pers, PersistentAble))
128
- ):
129
- logger.info(f"Persisting collection {k} to {final_dir}")
130
- final_dir.mkdir(parents=True, exist_ok=True)
131
- for per in persistent_ables:
132
- per.persist(final_dir)
133
- count += 1
134
- logger.info(f"Persisted {count} objects to {persist_dir}")
135
- return count
136
-
137
-
138
- class RetrieveFromPersistent[T: PersistentAble](Action):
139
- """Retrieve the object from the persistent file."""
140
-
141
- output_key: str = "retrieved_obj"
142
- """Retrieve the object from the persistent file."""
143
- load_path: str
144
- """The path of the persistent file or directory contains multiple file."""
145
- retrieve_cls: Type[T]
146
- """The class of the object to retrieve."""
147
-
148
- async def _execute(self, /, **_) -> Optional[T | List[T]]:
149
- logger.info(f"Retrieve `{self.retrieve_cls.__name__}` from {self.load_path}")
150
- if not (p := Path(self.load_path)).exists():
151
- logger.warning(f"Path {self.load_path} does not exist")
152
- return None
153
-
154
- if p.is_dir():
155
- logger.info(f"Found directory with {len(list(p.glob('*')))} items")
156
- return [self.retrieve_cls.from_persistent(per) for per in p.glob("*")]
157
- return self.retrieve_cls.from_persistent(self.load_path)
158
-
159
-
160
- class RetrieveFromLatest[T: PersistentAble](RetrieveFromPersistent[T], FromMapping):
161
- """Retrieve the object from the latest persistent file in the dir at `load_path`."""
162
-
163
- async def _execute(self, /, **_) -> Optional[T]:
164
- logger.info(f"Retrieve latest `{self.retrieve_cls.__name__}` from {self.load_path}")
165
- if not (p := Path(self.load_path)).exists():
166
- logger.warning(f"Path {self.load_path} does not exist")
167
- return None
168
-
169
- if p.is_dir():
170
- logger.info(f"Found directory with {len(list(p.glob('*')))} items")
171
- return self.retrieve_cls.from_latest_persistent(self.load_path)
172
- logger.error(f"Path {self.load_path} is not a directory")
173
- return None
174
-
175
- @classmethod
176
- def from_mapping(
177
- cls,
178
- mapping: Mapping[str, str | Path],
179
- *,
180
- retrieve_cls: Type[T],
181
- **kwargs,
182
- ) -> List["RetrieveFromLatest[T]"]:
183
- """Create a list of `RetrieveFromLatest` from the mapping."""
184
- return [
185
- cls(retrieve_cls=retrieve_cls, load_path=Path(p).as_posix(), output_key=o, **kwargs)
186
- for o, p in mapping.items()
187
- ]
188
-
189
-
190
- class GatherAsList(Action):
191
- """Gather the objects from the context as a list.
192
-
193
- Notes:
194
- If both `gather_suffix` and `gather_prefix` are specified, only the objects with the suffix will be gathered.
195
- """
196
-
197
- output_key: str = "gathered"
198
- """Gather the objects from the context as a list."""
199
- gather_suffix: Optional[str] = None
200
- """Gather the objects from the context as a list."""
201
- gather_prefix: Optional[str] = None
202
- """Gather the objects from the context as a list."""
203
-
204
- async def _execute(self, **cxt) -> List[Any]:
205
- if self.gather_suffix is not None:
206
- result = [cxt[k] for k in cxt if k.endswith(self.gather_suffix)]
207
- logger.debug(f"Gathered {len(result)} items with suffix {self.gather_suffix}")
208
- return result
209
- if self.gather_prefix is None:
210
- logger.error(err := "Either `gather_suffix` or `gather_prefix` must be specified.")
211
- raise ValueError(err)
212
- result = [cxt[k] for k in cxt if k.startswith(self.gather_prefix)]
213
- logger.debug(f"Gathered {len(result)} items with prefix {self.gather_prefix}")
214
- return result
215
-
216
-
217
- class Forward(Action, FromMapping, FromSequence):
218
- """Forward the object from the context to the output."""
219
-
220
- output_key: str = "forwarded"
221
- """Gather the objects from the context as a list."""
222
- original: str
223
-
224
- async def _execute(self, *_: Any, **cxt) -> Any:
225
- source = cxt.get(self.original)
226
- if source is None:
227
- logger.warning(f"Original object {self.original} not found in the context")
228
- return source
229
-
230
- @classmethod
231
- def from_sequence(cls, sequence: Sequence[str], *, original: str, **kwargs: Any) -> List[Self]:
232
- """Create a list of `Forward` from the sequence."""
233
- return [cls(original=original, output_key=o, **kwargs) for o in sequence]
234
-
235
- @classmethod
236
- def from_mapping(cls, mapping: Mapping[str, str | Sequence[str]], **kwargs: Any) -> List[Self]:
237
- """Create a list of `Forward` from the mapping."""
238
- actions = []
239
- for original_key, output_val in mapping.items():
240
- if isinstance(output_val, str):
241
- actions.append(cls(original=original_key, output_key=output_val, **kwargs))
242
- elif isinstance(output_val, Sequence):
243
- actions.extend(cls(original=original_key, output_key=output_key, **kwargs) for output_key in output_val)
244
- else:
245
- logger.warning(
246
- f"Invalid type for output key value in mapping: {type(output_val)} for original key {original_key}. Expected str or Sequence[str]."
247
- )
248
- return actions