fabricatio 0.2.1.dev0__cp313-cp313-win_amd64.whl → 0.3.14.dev5__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. fabricatio/__init__.py +12 -20
  2. fabricatio/actions/__init__.py +1 -5
  3. fabricatio/actions/article.py +319 -0
  4. fabricatio/actions/article_rag.py +416 -0
  5. fabricatio/actions/fs.py +25 -0
  6. fabricatio/actions/output.py +248 -0
  7. fabricatio/actions/rag.py +96 -0
  8. fabricatio/actions/rules.py +83 -0
  9. fabricatio/capabilities/__init__.py +1 -0
  10. fabricatio/capabilities/advanced_judge.py +20 -0
  11. fabricatio/capabilities/advanced_rag.py +61 -0
  12. fabricatio/capabilities/censor.py +105 -0
  13. fabricatio/capabilities/check.py +212 -0
  14. fabricatio/capabilities/correct.py +228 -0
  15. fabricatio/capabilities/extract.py +74 -0
  16. fabricatio/capabilities/persist.py +103 -0
  17. fabricatio/capabilities/propose.py +65 -0
  18. fabricatio/capabilities/rag.py +263 -0
  19. fabricatio/capabilities/rating.py +404 -0
  20. fabricatio/capabilities/review.py +114 -0
  21. fabricatio/capabilities/task.py +113 -0
  22. fabricatio/decorators.py +251 -179
  23. fabricatio/{core.py → emitter.py} +31 -21
  24. fabricatio/fs/__init__.py +32 -2
  25. fabricatio/fs/curd.py +32 -9
  26. fabricatio/fs/readers.py +44 -7
  27. fabricatio/journal.py +3 -19
  28. fabricatio/models/action.py +185 -61
  29. fabricatio/models/adv_kwargs_types.py +63 -0
  30. fabricatio/models/extra/__init__.py +1 -0
  31. fabricatio/models/extra/advanced_judge.py +32 -0
  32. fabricatio/models/extra/aricle_rag.py +284 -0
  33. fabricatio/models/extra/article_base.py +422 -0
  34. fabricatio/models/extra/article_essence.py +101 -0
  35. fabricatio/models/extra/article_main.py +284 -0
  36. fabricatio/models/extra/article_outline.py +46 -0
  37. fabricatio/models/extra/article_proposal.py +52 -0
  38. fabricatio/models/extra/patches.py +20 -0
  39. fabricatio/models/extra/problem.py +165 -0
  40. fabricatio/models/extra/rag.py +98 -0
  41. fabricatio/models/extra/rule.py +52 -0
  42. fabricatio/models/generic.py +704 -36
  43. fabricatio/models/kwargs_types.py +112 -17
  44. fabricatio/models/role.py +74 -27
  45. fabricatio/models/task.py +94 -60
  46. fabricatio/models/tool.py +328 -188
  47. fabricatio/models/usages.py +791 -515
  48. fabricatio/parser.py +81 -60
  49. fabricatio/rust.cp313-win_amd64.pyd +0 -0
  50. fabricatio/rust.pyi +886 -0
  51. fabricatio/toolboxes/__init__.py +1 -3
  52. fabricatio/toolboxes/fs.py +17 -1
  53. fabricatio/utils.py +156 -0
  54. fabricatio/workflows/__init__.py +1 -0
  55. fabricatio/workflows/articles.py +24 -0
  56. fabricatio/workflows/rag.py +11 -0
  57. fabricatio-0.3.14.dev5.data/scripts/tdown.exe +0 -0
  58. fabricatio-0.3.14.dev5.data/scripts/ttm.exe +0 -0
  59. fabricatio-0.3.14.dev5.dist-info/METADATA +188 -0
  60. fabricatio-0.3.14.dev5.dist-info/RECORD +64 -0
  61. {fabricatio-0.2.1.dev0.dist-info → fabricatio-0.3.14.dev5.dist-info}/WHEEL +1 -1
  62. fabricatio/_rust.cp313-win_amd64.pyd +0 -0
  63. fabricatio/_rust.pyi +0 -53
  64. fabricatio/_rust_instances.py +0 -8
  65. fabricatio/actions/communication.py +0 -15
  66. fabricatio/actions/transmission.py +0 -23
  67. fabricatio/config.py +0 -263
  68. fabricatio/models/advanced.py +0 -128
  69. fabricatio/models/events.py +0 -82
  70. fabricatio/models/utils.py +0 -78
  71. fabricatio/toolboxes/task.py +0 -6
  72. fabricatio-0.2.1.dev0.data/scripts/tdown.exe +0 -0
  73. fabricatio-0.2.1.dev0.dist-info/METADATA +0 -420
  74. fabricatio-0.2.1.dev0.dist-info/RECORD +0 -35
  75. {fabricatio-0.2.1.dev0.dist-info → fabricatio-0.3.14.dev5.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,63 @@
1
+ """A module containing kwargs types for content correction and checking operations."""
2
+
3
+ from importlib.util import find_spec
4
+ from typing import NotRequired, Optional, TypedDict
5
+
6
+ from fabricatio.models.extra.problem import Improvement
7
+ from fabricatio.models.extra.rule import RuleSet
8
+ from fabricatio.models.generic import SketchedAble
9
+ from fabricatio.models.kwargs_types import ReferencedKwargs
10
+
11
+
12
+ class CorrectKwargs[T: SketchedAble](ReferencedKwargs[T], total=False):
13
+ """Arguments for content correction operations.
14
+
15
+ Extends GenerateKwargs with parameters for correcting content based on
16
+ specific criteria and templates.
17
+ """
18
+
19
+ improvement: Improvement
20
+
21
+
22
+ class CheckKwargs(ReferencedKwargs[Improvement], total=False):
23
+ """Arguments for content checking operations.
24
+
25
+ Extends GenerateKwargs with parameters for checking content against
26
+ specific criteria and templates.
27
+ """
28
+
29
+ ruleset: RuleSet
30
+
31
+
32
+ if find_spec("pymilvus"):
33
+ from pymilvus import CollectionSchema
34
+ from pymilvus.milvus_client import IndexParams
35
+
36
+ class CollectionConfigKwargs(TypedDict, total=False):
37
+ """Configuration parameters for a vector collection.
38
+
39
+ These arguments are typically used when configuring connections to vector databases.
40
+ """
41
+
42
+ dimension: int | None
43
+ primary_field_name: str
44
+ id_type: str
45
+ vector_field_name: str
46
+ metric_type: str
47
+ timeout: float | None
48
+ schema: CollectionSchema | None
49
+ index_params: IndexParams | None
50
+
51
+ class FetchKwargs(TypedDict):
52
+ """Arguments for fetching data from vector collections.
53
+
54
+ Controls how data is retrieved from vector databases, including filtering
55
+ and result limiting parameters.
56
+ """
57
+
58
+ collection_name: NotRequired[str | None]
59
+ similarity_threshold: NotRequired[float]
60
+ result_per_query: NotRequired[int]
61
+ tei_endpoint: NotRequired[Optional[str]]
62
+ reranker_threshold: NotRequired[float]
63
+ filter_expr: NotRequired[str]
@@ -0,0 +1 @@
1
+ """A module contains extra models for fabricatio."""
@@ -0,0 +1,32 @@
1
+ """Module containing the JudgeMent class for holding judgment results."""
2
+
3
+ from typing import List
4
+
5
+ from fabricatio.models.generic import SketchedAble
6
+
7
+
8
+ class JudgeMent(SketchedAble):
9
+ """Represents a judgment result containing supporting/denying evidence and final verdict.
10
+
11
+ The class stores both affirmative and denies evidence, truth and reasons lists along with the final boolean judgment.
12
+ """
13
+
14
+ issue_to_judge: str
15
+ """The issue to be judged, including the original question and context"""
16
+
17
+ deny_evidence: List[str]
18
+ """List of clues supporting the denial."""
19
+
20
+ affirm_evidence: List[str]
21
+ """List of clues supporting the affirmation."""
22
+
23
+ final_judgement: bool
24
+ """The final judgment made according to all extracted clues. true for the `issue_to_judge` is correct and false for incorrect."""
25
+
26
+ def __bool__(self) -> bool:
27
+ """Return the final judgment value.
28
+
29
+ Returns:
30
+ bool: The stored final_judgement value indicating the judgment result.
31
+ """
32
+ return self.final_judgement
@@ -0,0 +1,284 @@
1
+ """A Module containing the article rag models."""
2
+
3
+ import re
4
+ from itertools import groupby
5
+ from pathlib import Path
6
+ from typing import ClassVar, Dict, List, Optional, Self, Unpack
7
+
8
+ from fabricatio.fs import safe_text_read
9
+ from fabricatio.journal import logger
10
+ from fabricatio.models.extra.rag import MilvusDataBase
11
+ from fabricatio.models.generic import AsPrompt
12
+ from fabricatio.models.kwargs_types import ChunkKwargs
13
+ from fabricatio.rust import BibManager, blake3_hash, split_into_chunks
14
+ from fabricatio.utils import ok, wrapp_in_block
15
+ from more_itertools.more import first
16
+ from more_itertools.recipes import flatten, unique
17
+ from pydantic import Field
18
+
19
+
20
+ class ArticleChunk(MilvusDataBase):
21
+ """The chunk of an article."""
22
+
23
+ etc_word: ClassVar[str] = "等"
24
+ and_word: ClassVar[str] = "与"
25
+ _cite_number: Optional[int] = None
26
+
27
+ head_split: ClassVar[List[str]] = [
28
+ "引 言",
29
+ "引言",
30
+ "绪 论",
31
+ "绪论",
32
+ "前言",
33
+ "INTRODUCTION",
34
+ "Introduction",
35
+ ]
36
+ tail_split: ClassVar[List[str]] = [
37
+ "参 考 文 献",
38
+ "参 考 文 献",
39
+ "参考文献",
40
+ "REFERENCES",
41
+ "References",
42
+ "Bibliography",
43
+ "Reference",
44
+ ]
45
+ chunk: str
46
+ """The segment of the article"""
47
+ year: int
48
+ """The year of the article"""
49
+ authors: List[str] = Field(default_factory=list)
50
+ """The authors of the article"""
51
+ article_title: str
52
+ """The title of the article"""
53
+ bibtex_cite_key: str
54
+ """The bibtex cite key of the article"""
55
+
56
+ @property
57
+ def reference_header(self) -> str:
58
+ """Get the reference header."""
59
+ return f"[[{ok(self._cite_number, 'You need to update cite number first.')}]] reference `{self.article_title}` from {self.as_auther_seq()}"
60
+
61
+ @property
62
+ def cite_number(self) -> int:
63
+ """Get the cite number."""
64
+ return ok(self._cite_number, "cite number not set")
65
+
66
+ def _prepare_vectorization_inner(self) -> str:
67
+ return self.chunk
68
+
69
+ @classmethod
70
+ def from_file[P: str | Path](
71
+ cls, path: P | List[P], bib_mgr: BibManager, **kwargs: Unpack[ChunkKwargs]
72
+ ) -> List[Self]:
73
+ """Load the article chunks from the file."""
74
+ if isinstance(path, list):
75
+ result = list(flatten(cls._from_file_inner(p, bib_mgr, **kwargs) for p in path))
76
+ logger.debug(f"Number of chunks created from list of files: {len(result)}")
77
+ return result
78
+
79
+ return cls._from_file_inner(path, bib_mgr, **kwargs)
80
+
81
+ @classmethod
82
+ def _from_file_inner(cls, path: str | Path, bib_mgr: BibManager, **kwargs: Unpack[ChunkKwargs]) -> List[Self]:
83
+ path = Path(path)
84
+
85
+ title_seg = path.stem.split(" - ").pop()
86
+
87
+ key = (
88
+ bib_mgr.get_cite_key_by_title(title_seg)
89
+ or bib_mgr.get_cite_key_by_title_fuzzy(title_seg)
90
+ or bib_mgr.get_cite_key_fuzzy(path.stem)
91
+ )
92
+ if key is None:
93
+ logger.warning(f"no cite key found for {path.as_posix()}, skip.")
94
+ return []
95
+ authors = ok(bib_mgr.get_author_by_key(key), f"no author found for {key}")
96
+ year = ok(bib_mgr.get_year_by_key(key), f"no year found for {key}")
97
+ article_title = ok(bib_mgr.get_title_by_key(key), f"no title found for {key}")
98
+
99
+ result = [
100
+ cls(chunk=c, year=year, authors=authors, article_title=article_title, bibtex_cite_key=key)
101
+ for c in split_into_chunks(cls.purge_numeric_citation(cls.strip(safe_text_read(path))), **kwargs)
102
+ ]
103
+
104
+ logger.debug(f"Number of chunks created from file {path.as_posix()}: {len(result)}")
105
+ return result
106
+
107
+ @classmethod
108
+ def strip(cls, string: str) -> str:
109
+ """Strip the head and tail of the string."""
110
+ logger.debug(f"String length before strip: {(original := len(string))}")
111
+ for split in (s for s in cls.head_split if s in string):
112
+ logger.debug(f"Strip head using {split}")
113
+ parts = string.split(split)
114
+ string = split.join(parts[1:]) if len(parts) > 1 else parts[0]
115
+ break
116
+ logger.debug(
117
+ f"String length after head strip: {(stripped_len := len(string))}, decreased by {(d := original - stripped_len)}"
118
+ )
119
+ if not d:
120
+ logger.warning("No decrease at head strip, which is might be abnormal.")
121
+ for split in (s for s in cls.tail_split if s in string):
122
+ logger.debug(f"Strip tail using {split}")
123
+ parts = string.split(split)
124
+ string = split.join(parts[:-1]) if len(parts) > 1 else parts[0]
125
+ break
126
+ logger.debug(f"String length after tail strip: {len(string)}, decreased by {(d := stripped_len - len(string))}")
127
+ if not d:
128
+ logger.warning("No decrease at tail strip, which is might be abnormal.")
129
+
130
+ return string
131
+
132
+ def as_typst_cite(self) -> str:
133
+ """As typst cite."""
134
+ return f"#cite(<{self.bibtex_cite_key}>)"
135
+
136
+ @staticmethod
137
+ def purge_numeric_citation(string: str) -> str:
138
+ """Purge numeric citation."""
139
+ import re
140
+
141
+ return re.sub(r"\[[\d\s,\\~–-]+]", "", string)
142
+
143
+ @property
144
+ def auther_lastnames(self) -> List[str]:
145
+ """Get the last name of the authors."""
146
+ return [n.split()[-1] for n in self.authors]
147
+
148
+ def as_auther_seq(self) -> str:
149
+ """Get the auther sequence."""
150
+ match len(self.authors):
151
+ case 0:
152
+ raise ValueError("No authors found")
153
+ case 1:
154
+ return f"({self.auther_lastnames[0]},{self.year}){self.as_typst_cite()}"
155
+ case 2:
156
+ return f"({self.auther_lastnames[0]}{self.and_word}{self.auther_lastnames[1]},{self.year}){self.as_typst_cite()}"
157
+ case 3:
158
+ return f"({self.auther_lastnames[0]},{self.auther_lastnames[1]}{self.and_word}{self.auther_lastnames[2]},{self.year}){self.as_typst_cite()}"
159
+ case _:
160
+ return f"({self.auther_lastnames[0]},{self.auther_lastnames[1]}{self.and_word}{self.auther_lastnames[2]}{self.etc_word},{self.year}){self.as_typst_cite()}"
161
+
162
+ def update_cite_number(self, cite_number: int) -> Self:
163
+ """Update the cite number."""
164
+ self._cite_number = cite_number
165
+ return self
166
+
167
+
168
+ class CitationManager(AsPrompt):
169
+ """Citation manager."""
170
+
171
+ article_chunks: List[ArticleChunk] = Field(default_factory=list)
172
+ """Article chunks."""
173
+
174
+ pat: str = r"(\[\[([\d\s,-]*)]])"
175
+ """Regex pattern to match citations."""
176
+ sep: str = ","
177
+ """Separator for citation numbers."""
178
+ abbr_sep: str = "-"
179
+ """Separator for abbreviated citation numbers."""
180
+
181
+ def update_chunks(
182
+ self, article_chunks: List[ArticleChunk], set_cite_number: bool = True, dedup: bool = True
183
+ ) -> Self:
184
+ """Update article chunks."""
185
+ self.article_chunks.clear()
186
+ self.article_chunks.extend(article_chunks)
187
+ if dedup:
188
+ self.article_chunks = list(unique(self.article_chunks, lambda c: blake3_hash(c.chunk.encode())))
189
+ if set_cite_number:
190
+ self.set_cite_number_all()
191
+ return self
192
+
193
+ def empty(self) -> Self:
194
+ """Empty the article chunks."""
195
+ self.article_chunks.clear()
196
+ return self
197
+
198
+ def add_chunks(self, article_chunks: List[ArticleChunk], set_cite_number: bool = True, dedup: bool = True) -> Self:
199
+ """Add article chunks."""
200
+ self.article_chunks.extend(article_chunks)
201
+ if dedup:
202
+ self.article_chunks = list(unique(self.article_chunks, lambda c: blake3_hash(c.chunk.encode())))
203
+ if set_cite_number:
204
+ self.set_cite_number_all()
205
+ return self
206
+
207
+ def set_cite_number_all(self) -> Self:
208
+ """Set citation numbers for all article chunks."""
209
+ number_mapping = {a.bibtex_cite_key: 0 for a in self.article_chunks}
210
+
211
+ for i, k in enumerate(number_mapping.keys()):
212
+ number_mapping[k] = i
213
+
214
+ for a in self.article_chunks:
215
+ a.update_cite_number(number_mapping[a.bibtex_cite_key])
216
+ return self
217
+
218
+ def _as_prompt_inner(self) -> Dict[str, str]:
219
+ """Generate prompt inner representation."""
220
+ seg = []
221
+ for k, g_iter in groupby(self.article_chunks, key=lambda a: a.bibtex_cite_key):
222
+ g = list(g_iter)
223
+
224
+ logger.debug(f"Group [{k}]: {len(g)}")
225
+ seg.append(wrapp_in_block("\n\n".join(a.chunk for a in g), first(g).reference_header))
226
+ return {"References": "\n".join(seg)}
227
+
228
+ def apply(self, string: str) -> str:
229
+ """Apply citation replacements to the input string."""
230
+ for origin, m in re.findall(self.pat, string):
231
+ logger.info(f"Matching citation: {m}")
232
+ notations = self.convert_to_numeric_notations(m)
233
+ logger.info(f"Citing Notations: {notations}")
234
+ citation_number_seq = list(flatten(self.decode_expr(n) for n in notations))
235
+ logger.info(f"Citation Number Sequence: {citation_number_seq}")
236
+ dedup = self.deduplicate_citation(citation_number_seq)
237
+ logger.info(f"Deduplicated Citation Number Sequence: {dedup}")
238
+ string = string.replace(origin, self.unpack_cite_seq(dedup))
239
+ return string
240
+
241
+ def citation_count(self, string: str) -> int:
242
+ """Get the citation count in the string."""
243
+ count = 0
244
+ for _, m in re.findall(self.pat, string):
245
+ logger.info(f"Matching citation: {m}")
246
+ notations = self.convert_to_numeric_notations(m)
247
+ logger.info(f"Citing Notations: {notations}")
248
+ citation_number_seq = list(flatten(self.decode_expr(n) for n in notations))
249
+ logger.info(f"Citation Number Sequence: {citation_number_seq}")
250
+ count += len(dedup := self.deduplicate_citation(citation_number_seq))
251
+ logger.info(f"Deduplicated Citation Number Sequence: {dedup}")
252
+ return count
253
+
254
+ def citation_coverage(self, string: str) -> float:
255
+ """Get the citation coverage in the string."""
256
+ return self.citation_count(string) / len(self.article_chunks)
257
+
258
+ def decode_expr(self, string: str) -> List[int]:
259
+ """Decode citation expression into a list of integers."""
260
+ if self.abbr_sep in string:
261
+ start, end = string.split(self.abbr_sep)
262
+ return list(range(int(start), int(end) + 1))
263
+ return [int(string)]
264
+
265
+ def convert_to_numeric_notations(self, string: str) -> List[str]:
266
+ """Convert citation string into numeric notations."""
267
+ return [s.strip() for s in string.split(self.sep)]
268
+
269
+ def deduplicate_citation(self, citation_seq: List[int]) -> List[int]:
270
+ """Deduplicate citation sequence."""
271
+ chunk_seq = [a for a in self.article_chunks if a.cite_number in citation_seq]
272
+ deduped = unique(chunk_seq, lambda a: a.bibtex_cite_key)
273
+ return [a.cite_number for a in deduped]
274
+
275
+ def unpack_cite_seq(self, citation_seq: List[int]) -> str:
276
+ """Unpack citation sequence into a string."""
277
+ chunk_seq = {a.bibtex_cite_key: a for a in self.article_chunks if a.cite_number in citation_seq}
278
+ return "".join(a.as_typst_cite() for a in chunk_seq.values())
279
+
280
+ def as_milvus_filter_expr(self, blacklist: bool = True) -> str:
281
+ """Asynchronously fetches documents from a Milvus database based on input vectors."""
282
+ if blacklist:
283
+ return " and ".join(f'bibtex_cite_key != "{a.bibtex_cite_key}"' for a in self.article_chunks)
284
+ return " or ".join(f'bibtex_cite_key == "{a.bibtex_cite_key}"' for a in self.article_chunks)