fabricatio 0.2.1.dev0__cp313-cp313-win_amd64.whl → 0.3.14__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. fabricatio/__init__.py +12 -20
  2. fabricatio/actions/__init__.py +1 -5
  3. fabricatio/actions/article.py +356 -0
  4. fabricatio/actions/article_rag.py +407 -0
  5. fabricatio/actions/fs.py +25 -0
  6. fabricatio/actions/output.py +248 -0
  7. fabricatio/actions/rag.py +96 -0
  8. fabricatio/actions/rules.py +83 -0
  9. fabricatio/capabilities/__init__.py +1 -0
  10. fabricatio/capabilities/advanced_judge.py +20 -0
  11. fabricatio/capabilities/advanced_rag.py +61 -0
  12. fabricatio/capabilities/censor.py +105 -0
  13. fabricatio/capabilities/check.py +212 -0
  14. fabricatio/capabilities/correct.py +228 -0
  15. fabricatio/capabilities/extract.py +74 -0
  16. fabricatio/capabilities/persist.py +103 -0
  17. fabricatio/capabilities/propose.py +65 -0
  18. fabricatio/capabilities/rag.py +264 -0
  19. fabricatio/capabilities/rating.py +404 -0
  20. fabricatio/capabilities/review.py +114 -0
  21. fabricatio/capabilities/task.py +113 -0
  22. fabricatio/decorators.py +253 -179
  23. fabricatio/{core.py → emitter.py} +31 -21
  24. fabricatio/fs/__init__.py +32 -2
  25. fabricatio/fs/curd.py +32 -9
  26. fabricatio/fs/readers.py +44 -7
  27. fabricatio/journal.py +3 -19
  28. fabricatio/models/action.py +185 -61
  29. fabricatio/models/adv_kwargs_types.py +63 -0
  30. fabricatio/models/extra/__init__.py +1 -0
  31. fabricatio/models/extra/advanced_judge.py +32 -0
  32. fabricatio/models/extra/aricle_rag.py +286 -0
  33. fabricatio/models/extra/article_base.py +455 -0
  34. fabricatio/models/extra/article_essence.py +101 -0
  35. fabricatio/models/extra/article_main.py +286 -0
  36. fabricatio/models/extra/article_outline.py +46 -0
  37. fabricatio/models/extra/article_proposal.py +52 -0
  38. fabricatio/models/extra/patches.py +20 -0
  39. fabricatio/models/extra/problem.py +165 -0
  40. fabricatio/models/extra/rag.py +98 -0
  41. fabricatio/models/extra/rule.py +52 -0
  42. fabricatio/models/generic.py +704 -36
  43. fabricatio/models/kwargs_types.py +112 -17
  44. fabricatio/models/role.py +77 -26
  45. fabricatio/models/task.py +94 -60
  46. fabricatio/models/tool.py +328 -188
  47. fabricatio/models/usages.py +791 -515
  48. fabricatio/parser.py +81 -60
  49. fabricatio/rust.cp313-win_amd64.pyd +0 -0
  50. fabricatio/rust.pyi +843 -0
  51. fabricatio/toolboxes/__init__.py +1 -3
  52. fabricatio/toolboxes/fs.py +17 -1
  53. fabricatio/utils.py +156 -0
  54. fabricatio/workflows/__init__.py +1 -0
  55. fabricatio/workflows/articles.py +24 -0
  56. fabricatio/workflows/rag.py +11 -0
  57. fabricatio-0.3.14.data/scripts/tdown.exe +0 -0
  58. fabricatio-0.3.14.data/scripts/ttm.exe +0 -0
  59. fabricatio-0.3.14.dist-info/METADATA +189 -0
  60. fabricatio-0.3.14.dist-info/RECORD +64 -0
  61. {fabricatio-0.2.1.dev0.dist-info → fabricatio-0.3.14.dist-info}/WHEEL +1 -1
  62. fabricatio/_rust.cp313-win_amd64.pyd +0 -0
  63. fabricatio/_rust.pyi +0 -53
  64. fabricatio/_rust_instances.py +0 -8
  65. fabricatio/actions/communication.py +0 -15
  66. fabricatio/actions/transmission.py +0 -23
  67. fabricatio/config.py +0 -263
  68. fabricatio/models/advanced.py +0 -128
  69. fabricatio/models/events.py +0 -82
  70. fabricatio/models/utils.py +0 -78
  71. fabricatio/toolboxes/task.py +0 -6
  72. fabricatio-0.2.1.dev0.data/scripts/tdown.exe +0 -0
  73. fabricatio-0.2.1.dev0.dist-info/METADATA +0 -420
  74. fabricatio-0.2.1.dev0.dist-info/RECORD +0 -35
  75. {fabricatio-0.2.1.dev0.dist-info → fabricatio-0.3.14.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,63 @@
1
+ """A module containing kwargs types for content correction and checking operations."""
2
+
3
+ from importlib.util import find_spec
4
+ from typing import NotRequired, Optional, TypedDict
5
+
6
+ from fabricatio.models.extra.problem import Improvement
7
+ from fabricatio.models.extra.rule import RuleSet
8
+ from fabricatio.models.generic import SketchedAble
9
+ from fabricatio.models.kwargs_types import ReferencedKwargs
10
+
11
+
12
+ class CorrectKwargs[T: SketchedAble](ReferencedKwargs[T], total=False):
13
+ """Arguments for content correction operations.
14
+
15
+ Extends GenerateKwargs with parameters for correcting content based on
16
+ specific criteria and templates.
17
+ """
18
+
19
+ improvement: Improvement
20
+
21
+
22
+ class CheckKwargs(ReferencedKwargs[Improvement], total=False):
23
+ """Arguments for content checking operations.
24
+
25
+ Extends GenerateKwargs with parameters for checking content against
26
+ specific criteria and templates.
27
+ """
28
+
29
+ ruleset: RuleSet
30
+
31
+
32
+ if find_spec("pymilvus"):
33
+ from pymilvus import CollectionSchema
34
+ from pymilvus.milvus_client import IndexParams
35
+
36
+ class CollectionConfigKwargs(TypedDict, total=False):
37
+ """Configuration parameters for a vector collection.
38
+
39
+ These arguments are typically used when configuring connections to vector databases.
40
+ """
41
+
42
+ dimension: int | None
43
+ primary_field_name: str
44
+ id_type: str
45
+ vector_field_name: str
46
+ metric_type: str
47
+ timeout: float | None
48
+ schema: CollectionSchema | None
49
+ index_params: IndexParams | None
50
+
51
+ class FetchKwargs(TypedDict):
52
+ """Arguments for fetching data from vector collections.
53
+
54
+ Controls how data is retrieved from vector databases, including filtering
55
+ and result limiting parameters.
56
+ """
57
+
58
+ collection_name: NotRequired[str | None]
59
+ similarity_threshold: NotRequired[float]
60
+ result_per_query: NotRequired[int]
61
+ tei_endpoint: NotRequired[Optional[str]]
62
+ reranker_threshold: NotRequired[float]
63
+ filter_expr: NotRequired[str]
@@ -0,0 +1 @@
1
+ """A module contains extra models for fabricatio."""
@@ -0,0 +1,32 @@
1
+ """Module containing the JudgeMent class for holding judgment results."""
2
+
3
+ from typing import List
4
+
5
+ from fabricatio.models.generic import SketchedAble
6
+
7
+
8
+ class JudgeMent(SketchedAble):
9
+ """Represents a judgment result containing supporting/denying evidence and final verdict.
10
+
11
+ The class stores both affirmative and denies evidence, truth and reasons lists along with the final boolean judgment.
12
+ """
13
+
14
+ issue_to_judge: str
15
+ """The issue to be judged, including the original question and context"""
16
+
17
+ deny_evidence: List[str]
18
+ """List of clues supporting the denial."""
19
+
20
+ affirm_evidence: List[str]
21
+ """List of clues supporting the affirmation."""
22
+
23
+ final_judgement: bool
24
+ """The final judgment made according to all extracted clues. true for the `issue_to_judge` is correct and false for incorrect."""
25
+
26
+ def __bool__(self) -> bool:
27
+ """Return the final judgment value.
28
+
29
+ Returns:
30
+ bool: The stored final_judgement value indicating the judgment result.
31
+ """
32
+ return self.final_judgement
@@ -0,0 +1,286 @@
1
+ """A Module containing the article rag models."""
2
+
3
+ import re
4
+ from dataclasses import dataclass, field
5
+ from itertools import groupby
6
+ from pathlib import Path
7
+ from typing import ClassVar, Dict, List, Optional, Self, Unpack
8
+
9
+ from fabricatio.fs import safe_text_read
10
+ from fabricatio.journal import logger
11
+ from fabricatio.models.extra.rag import MilvusDataBase
12
+ from fabricatio.models.generic import AsPrompt
13
+ from fabricatio.models.kwargs_types import ChunkKwargs
14
+ from fabricatio.rust import BibManager, blake3_hash, split_into_chunks
15
+ from fabricatio.utils import ok, wrapp_in_block
16
+ from more_itertools.more import first
17
+ from more_itertools.recipes import flatten, unique
18
+ from pydantic import Field
19
+
20
+
21
+ class ArticleChunk(MilvusDataBase):
22
+ """The chunk of an article."""
23
+
24
+ etc_word: ClassVar[str] = "等"
25
+ and_word: ClassVar[str] = "与"
26
+ _cite_number: Optional[int] = None
27
+
28
+ head_split: ClassVar[List[str]] = [
29
+ "引 言",
30
+ "引言",
31
+ "绪 论",
32
+ "绪论",
33
+ "前言",
34
+ "INTRODUCTION",
35
+ "Introduction",
36
+ ]
37
+ tail_split: ClassVar[List[str]] = [
38
+ "参 考 文 献",
39
+ "参 考 文 献",
40
+ "参考文献",
41
+ "REFERENCES",
42
+ "References",
43
+ "Bibliography",
44
+ "Reference",
45
+ ]
46
+ chunk: str
47
+ """The segment of the article"""
48
+ year: int
49
+ """The year of the article"""
50
+ authors: List[str] = Field(default_factory=list)
51
+ """The authors of the article"""
52
+ article_title: str
53
+ """The title of the article"""
54
+ bibtex_cite_key: str
55
+ """The bibtex cite key of the article"""
56
+
57
+ @property
58
+ def reference_header(self) -> str:
59
+ """Get the reference header."""
60
+ return f"[[{ok(self._cite_number, 'You need to update cite number first.')}]] reference `{self.article_title}` from {self.as_auther_seq()}"
61
+
62
+ @property
63
+ def cite_number(self) -> int:
64
+ """Get the cite number."""
65
+ return ok(self._cite_number, "cite number not set")
66
+
67
+ def _prepare_vectorization_inner(self) -> str:
68
+ return self.chunk
69
+
70
+ @classmethod
71
+ def from_file[P: str | Path](
72
+ cls, path: P | List[P], bib_mgr: BibManager, **kwargs: Unpack[ChunkKwargs]
73
+ ) -> List[Self]:
74
+ """Load the article chunks from the file."""
75
+ if isinstance(path, list):
76
+ result = list(flatten(cls._from_file_inner(p, bib_mgr, **kwargs) for p in path))
77
+ logger.debug(f"Number of chunks created from list of files: {len(result)}")
78
+ return result
79
+
80
+ return cls._from_file_inner(path, bib_mgr, **kwargs)
81
+
82
+ @classmethod
83
+ def _from_file_inner(cls, path: str | Path, bib_mgr: BibManager, **kwargs: Unpack[ChunkKwargs]) -> List[Self]:
84
+ path = Path(path)
85
+
86
+ title_seg = path.stem.split(" - ").pop()
87
+
88
+ key = (
89
+ bib_mgr.get_cite_key_by_title(title_seg)
90
+ or bib_mgr.get_cite_key_by_title_fuzzy(title_seg)
91
+ or bib_mgr.get_cite_key_fuzzy(path.stem)
92
+ )
93
+ if key is None:
94
+ logger.warning(f"no cite key found for {path.as_posix()}, skip.")
95
+ return []
96
+ authors = ok(bib_mgr.get_author_by_key(key), f"no author found for {key}")
97
+ year = ok(bib_mgr.get_year_by_key(key), f"no year found for {key}")
98
+ article_title = ok(bib_mgr.get_title_by_key(key), f"no title found for {key}")
99
+
100
+ result = [
101
+ cls(chunk=c, year=year, authors=authors, article_title=article_title, bibtex_cite_key=key)
102
+ for c in split_into_chunks(cls.purge_numeric_citation(cls.strip(safe_text_read(path))), **kwargs)
103
+ ]
104
+
105
+ logger.debug(f"Number of chunks created from file {path.as_posix()}: {len(result)}")
106
+ return result
107
+
108
+ @classmethod
109
+ def strip(cls, string: str) -> str:
110
+ """Strip the head and tail of the string."""
111
+ logger.debug(f"String length before strip: {(original := len(string))}")
112
+ for split in (s for s in cls.head_split if s in string):
113
+ logger.debug(f"Strip head using {split}")
114
+ parts = string.split(split)
115
+ string = split.join(parts[1:]) if len(parts) > 1 else parts[0]
116
+ break
117
+ logger.debug(
118
+ f"String length after head strip: {(stripped_len := len(string))}, decreased by {(d := original - stripped_len)}"
119
+ )
120
+ if not d:
121
+ logger.warning("No decrease at head strip, which is might be abnormal.")
122
+ for split in (s for s in cls.tail_split if s in string):
123
+ logger.debug(f"Strip tail using {split}")
124
+ parts = string.split(split)
125
+ string = split.join(parts[:-1]) if len(parts) > 1 else parts[0]
126
+ break
127
+ logger.debug(f"String length after tail strip: {len(string)}, decreased by {(d := stripped_len - len(string))}")
128
+ if not d:
129
+ logger.warning("No decrease at tail strip, which is might be abnormal.")
130
+
131
+ return string
132
+
133
+ def as_typst_cite(self) -> str:
134
+ """As typst cite."""
135
+ return f"#cite(<{self.bibtex_cite_key}>)"
136
+
137
+ @staticmethod
138
+ def purge_numeric_citation(string: str) -> str:
139
+ """Purge numeric citation."""
140
+ import re
141
+
142
+ return re.sub(r"\[[\d\s,\\~–-]+]", "", string)
143
+
144
+ @property
145
+ def auther_lastnames(self) -> List[str]:
146
+ """Get the last name of the authors."""
147
+ return [n.split()[-1] for n in self.authors]
148
+
149
+ def as_auther_seq(self) -> str:
150
+ """Get the auther sequence."""
151
+ match len(self.authors):
152
+ case 0:
153
+ raise ValueError("No authors found")
154
+ case 1:
155
+ return f"({self.auther_lastnames[0]},{self.year}){self.as_typst_cite()}"
156
+ case 2:
157
+ return f"({self.auther_lastnames[0]}{self.and_word}{self.auther_lastnames[1]},{self.year}){self.as_typst_cite()}"
158
+ case 3:
159
+ return f"({self.auther_lastnames[0]},{self.auther_lastnames[1]}{self.and_word}{self.auther_lastnames[2]},{self.year}){self.as_typst_cite()}"
160
+ case _:
161
+ return f"({self.auther_lastnames[0]},{self.auther_lastnames[1]}{self.and_word}{self.auther_lastnames[2]}{self.etc_word},{self.year}){self.as_typst_cite()}"
162
+
163
+ def update_cite_number(self, cite_number: int) -> Self:
164
+ """Update the cite number."""
165
+ self._cite_number = cite_number
166
+ return self
167
+
168
+
169
+ @dataclass
170
+ class CitationManager(AsPrompt):
171
+ """Citation manager."""
172
+
173
+ article_chunks: List[ArticleChunk] = field(default_factory=list)
174
+ """Article chunks."""
175
+
176
+ pat: str = r"(\[\[([\d\s,-]*)]])"
177
+ """Regex pattern to match citations."""
178
+ sep: str = ","
179
+ """Separator for citation numbers."""
180
+ abbr_sep: str = "-"
181
+ """Separator for abbreviated citation numbers."""
182
+
183
+ def update_chunks(
184
+ self, article_chunks: List[ArticleChunk], set_cite_number: bool = True, dedup: bool = True
185
+ ) -> Self:
186
+ """Update article chunks."""
187
+ self.article_chunks.clear()
188
+ self.article_chunks.extend(article_chunks)
189
+ if dedup:
190
+ self.article_chunks = list(unique(self.article_chunks, lambda c: blake3_hash(c.chunk.encode())))
191
+ if set_cite_number:
192
+ self.set_cite_number_all()
193
+ return self
194
+
195
+ def empty(self) -> Self:
196
+ """Empty the article chunks."""
197
+ self.article_chunks.clear()
198
+ return self
199
+
200
+ def add_chunks(self, article_chunks: List[ArticleChunk], set_cite_number: bool = True, dedup: bool = True) -> Self:
201
+ """Add article chunks."""
202
+ self.article_chunks.extend(article_chunks)
203
+ if dedup:
204
+ self.article_chunks = list(unique(self.article_chunks, lambda c: blake3_hash(c.chunk.encode())))
205
+ if set_cite_number:
206
+ self.set_cite_number_all()
207
+ return self
208
+
209
+ def set_cite_number_all(self) -> Self:
210
+ """Set citation numbers for all article chunks."""
211
+ number_mapping = {a.bibtex_cite_key: 0 for a in self.article_chunks}
212
+
213
+ for i, k in enumerate(number_mapping.keys()):
214
+ number_mapping[k] = i
215
+
216
+ for a in self.article_chunks:
217
+ a.update_cite_number(number_mapping[a.bibtex_cite_key])
218
+ return self
219
+
220
+ def _as_prompt_inner(self) -> Dict[str, str]:
221
+ """Generate prompt inner representation."""
222
+ seg = []
223
+ for k, g_iter in groupby(self.article_chunks, key=lambda a: a.bibtex_cite_key):
224
+ g = list(g_iter)
225
+
226
+ logger.debug(f"Group [{k}]: {len(g)}")
227
+ seg.append(wrapp_in_block("\n\n".join(a.chunk for a in g), first(g).reference_header))
228
+ return {"References": "\n".join(seg)}
229
+
230
+ def apply(self, string: str) -> str:
231
+ """Apply citation replacements to the input string."""
232
+ for origin, m in re.findall(self.pat, string):
233
+ logger.info(f"Matching citation: {m}")
234
+ notations = self.convert_to_numeric_notations(m)
235
+ logger.info(f"Citing Notations: {notations}")
236
+ citation_number_seq = list(flatten(self.decode_expr(n) for n in notations))
237
+ logger.info(f"Citation Number Sequence: {citation_number_seq}")
238
+ dedup = self.deduplicate_citation(citation_number_seq)
239
+ logger.info(f"Deduplicated Citation Number Sequence: {dedup}")
240
+ string = string.replace(origin, self.unpack_cite_seq(dedup))
241
+ return string
242
+
243
+ def citation_count(self, string: str) -> int:
244
+ """Get the citation count in the string."""
245
+ count = 0
246
+ for _, m in re.findall(self.pat, string):
247
+ logger.info(f"Matching citation: {m}")
248
+ notations = self.convert_to_numeric_notations(m)
249
+ logger.info(f"Citing Notations: {notations}")
250
+ citation_number_seq = list(flatten(self.decode_expr(n) for n in notations))
251
+ logger.info(f"Citation Number Sequence: {citation_number_seq}")
252
+ count += len(dedup := self.deduplicate_citation(citation_number_seq))
253
+ logger.info(f"Deduplicated Citation Number Sequence: {dedup}")
254
+ return count
255
+
256
+ def citation_coverage(self, string: str) -> float:
257
+ """Get the citation coverage in the string."""
258
+ return self.citation_count(string) / len(self.article_chunks)
259
+
260
+ def decode_expr(self, string: str) -> List[int]:
261
+ """Decode citation expression into a list of integers."""
262
+ if self.abbr_sep in string:
263
+ start, end = string.split(self.abbr_sep)
264
+ return list(range(int(start), int(end) + 1))
265
+ return [int(string)]
266
+
267
+ def convert_to_numeric_notations(self, string: str) -> List[str]:
268
+ """Convert citation string into numeric notations."""
269
+ return [s.strip() for s in string.split(self.sep)]
270
+
271
+ def deduplicate_citation(self, citation_seq: List[int]) -> List[int]:
272
+ """Deduplicate citation sequence."""
273
+ chunk_seq = [a for a in self.article_chunks if a.cite_number in citation_seq]
274
+ deduped = unique(chunk_seq, lambda a: a.bibtex_cite_key)
275
+ return [a.cite_number for a in deduped]
276
+
277
+ def unpack_cite_seq(self, citation_seq: List[int]) -> str:
278
+ """Unpack citation sequence into a string."""
279
+ chunk_seq = {a.bibtex_cite_key: a for a in self.article_chunks if a.cite_number in citation_seq}
280
+ return "".join(a.as_typst_cite() for a in chunk_seq.values())
281
+
282
+ def as_milvus_filter_expr(self, blacklist: bool = True) -> str:
283
+ """Asynchronously fetches documents from a Milvus database based on input vectors."""
284
+ if blacklist:
285
+ return " and ".join(f'bibtex_cite_key != "{a.bibtex_cite_key}"' for a in self.article_chunks)
286
+ return " or ".join(f'bibtex_cite_key == "{a.bibtex_cite_key}"' for a in self.article_chunks)