fabricatio 0.3.15.dev4__cp312-cp312-win_amd64.whl → 0.4.4__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. fabricatio/__init__.py +7 -8
  2. fabricatio/actions/__init__.py +69 -1
  3. fabricatio/capabilities/__init__.py +59 -1
  4. fabricatio/models/__init__.py +47 -0
  5. fabricatio/rust.cp312-win_amd64.pyd +0 -0
  6. fabricatio/toolboxes/__init__.py +2 -1
  7. fabricatio/toolboxes/arithmetic.py +1 -1
  8. fabricatio/toolboxes/fs.py +2 -2
  9. fabricatio/workflows/__init__.py +9 -0
  10. fabricatio-0.4.4.data/scripts/tdown.exe +0 -0
  11. {fabricatio-0.3.15.dev4.dist-info → fabricatio-0.4.4.dist-info}/METADATA +49 -25
  12. fabricatio-0.4.4.dist-info/RECORD +15 -0
  13. fabricatio/actions/article.py +0 -415
  14. fabricatio/actions/article_rag.py +0 -407
  15. fabricatio/actions/fs.py +0 -25
  16. fabricatio/actions/output.py +0 -248
  17. fabricatio/actions/rag.py +0 -96
  18. fabricatio/actions/rules.py +0 -83
  19. fabricatio/capabilities/advanced_judge.py +0 -20
  20. fabricatio/capabilities/advanced_rag.py +0 -61
  21. fabricatio/capabilities/censor.py +0 -105
  22. fabricatio/capabilities/check.py +0 -212
  23. fabricatio/capabilities/correct.py +0 -228
  24. fabricatio/capabilities/extract.py +0 -74
  25. fabricatio/capabilities/persist.py +0 -103
  26. fabricatio/capabilities/propose.py +0 -65
  27. fabricatio/capabilities/rag.py +0 -264
  28. fabricatio/capabilities/rating.py +0 -404
  29. fabricatio/capabilities/review.py +0 -114
  30. fabricatio/capabilities/task.py +0 -113
  31. fabricatio/decorators.py +0 -253
  32. fabricatio/emitter.py +0 -177
  33. fabricatio/fs/__init__.py +0 -35
  34. fabricatio/fs/curd.py +0 -153
  35. fabricatio/fs/readers.py +0 -61
  36. fabricatio/journal.py +0 -12
  37. fabricatio/models/action.py +0 -263
  38. fabricatio/models/adv_kwargs_types.py +0 -63
  39. fabricatio/models/extra/__init__.py +0 -1
  40. fabricatio/models/extra/advanced_judge.py +0 -32
  41. fabricatio/models/extra/aricle_rag.py +0 -286
  42. fabricatio/models/extra/article_base.py +0 -486
  43. fabricatio/models/extra/article_essence.py +0 -101
  44. fabricatio/models/extra/article_main.py +0 -286
  45. fabricatio/models/extra/article_outline.py +0 -46
  46. fabricatio/models/extra/article_proposal.py +0 -52
  47. fabricatio/models/extra/patches.py +0 -20
  48. fabricatio/models/extra/problem.py +0 -165
  49. fabricatio/models/extra/rag.py +0 -98
  50. fabricatio/models/extra/rule.py +0 -52
  51. fabricatio/models/generic.py +0 -812
  52. fabricatio/models/kwargs_types.py +0 -121
  53. fabricatio/models/role.py +0 -99
  54. fabricatio/models/task.py +0 -310
  55. fabricatio/models/tool.py +0 -328
  56. fabricatio/models/usages.py +0 -791
  57. fabricatio/parser.py +0 -114
  58. fabricatio/rust.pyi +0 -846
  59. fabricatio/utils.py +0 -156
  60. fabricatio/workflows/articles.py +0 -24
  61. fabricatio/workflows/rag.py +0 -11
  62. fabricatio-0.3.15.dev4.data/scripts/tdown.exe +0 -0
  63. fabricatio-0.3.15.dev4.data/scripts/ttm.exe +0 -0
  64. fabricatio-0.3.15.dev4.dist-info/RECORD +0 -64
  65. {fabricatio-0.3.15.dev4.dist-info → fabricatio-0.4.4.dist-info}/WHEEL +0 -0
  66. {fabricatio-0.3.15.dev4.dist-info → fabricatio-0.4.4.dist-info}/licenses/LICENSE +0 -0
@@ -1,286 +0,0 @@
1
- """A Module containing the article rag models."""
2
-
3
- import re
4
- from dataclasses import dataclass, field
5
- from itertools import groupby
6
- from pathlib import Path
7
- from typing import ClassVar, Dict, List, Optional, Self, Unpack
8
-
9
- from fabricatio.fs import safe_text_read
10
- from fabricatio.journal import logger
11
- from fabricatio.models.extra.rag import MilvusDataBase
12
- from fabricatio.models.generic import AsPrompt
13
- from fabricatio.models.kwargs_types import ChunkKwargs
14
- from fabricatio.rust import BibManager, blake3_hash, split_into_chunks
15
- from fabricatio.utils import ok, wrapp_in_block
16
- from more_itertools.more import first
17
- from more_itertools.recipes import flatten, unique
18
- from pydantic import Field
19
-
20
-
21
- class ArticleChunk(MilvusDataBase):
22
- """The chunk of an article."""
23
-
24
- etc_word: ClassVar[str] = "等"
25
- and_word: ClassVar[str] = "与"
26
- _cite_number: Optional[int] = None
27
-
28
- head_split: ClassVar[List[str]] = [
29
- "引 言",
30
- "引言",
31
- "绪 论",
32
- "绪论",
33
- "前言",
34
- "INTRODUCTION",
35
- "Introduction",
36
- ]
37
- tail_split: ClassVar[List[str]] = [
38
- "参 考 文 献",
39
- "参 考 文 献",
40
- "参考文献",
41
- "REFERENCES",
42
- "References",
43
- "Bibliography",
44
- "Reference",
45
- ]
46
- chunk: str
47
- """The segment of the article"""
48
- year: int
49
- """The year of the article"""
50
- authors: List[str] = Field(default_factory=list)
51
- """The authors of the article"""
52
- article_title: str
53
- """The title of the article"""
54
- bibtex_cite_key: str
55
- """The bibtex cite key of the article"""
56
-
57
- @property
58
- def reference_header(self) -> str:
59
- """Get the reference header."""
60
- return f"[[{ok(self._cite_number, 'You need to update cite number first.')}]] reference `{self.article_title}` from {self.as_auther_seq()}"
61
-
62
- @property
63
- def cite_number(self) -> int:
64
- """Get the cite number."""
65
- return ok(self._cite_number, "cite number not set")
66
-
67
- def _prepare_vectorization_inner(self) -> str:
68
- return self.chunk
69
-
70
- @classmethod
71
- def from_file[P: str | Path](
72
- cls, path: P | List[P], bib_mgr: BibManager, **kwargs: Unpack[ChunkKwargs]
73
- ) -> List[Self]:
74
- """Load the article chunks from the file."""
75
- if isinstance(path, list):
76
- result = list(flatten(cls._from_file_inner(p, bib_mgr, **kwargs) for p in path))
77
- logger.debug(f"Number of chunks created from list of files: {len(result)}")
78
- return result
79
-
80
- return cls._from_file_inner(path, bib_mgr, **kwargs)
81
-
82
- @classmethod
83
- def _from_file_inner(cls, path: str | Path, bib_mgr: BibManager, **kwargs: Unpack[ChunkKwargs]) -> List[Self]:
84
- path = Path(path)
85
-
86
- title_seg = path.stem.split(" - ").pop()
87
-
88
- key = (
89
- bib_mgr.get_cite_key_by_title(title_seg)
90
- or bib_mgr.get_cite_key_by_title_fuzzy(title_seg)
91
- or bib_mgr.get_cite_key_fuzzy(path.stem)
92
- )
93
- if key is None:
94
- logger.warning(f"no cite key found for {path.as_posix()}, skip.")
95
- return []
96
- authors = ok(bib_mgr.get_author_by_key(key), f"no author found for {key}")
97
- year = ok(bib_mgr.get_year_by_key(key), f"no year found for {key}")
98
- article_title = ok(bib_mgr.get_title_by_key(key), f"no title found for {key}")
99
-
100
- result = [
101
- cls(chunk=c, year=year, authors=authors, article_title=article_title, bibtex_cite_key=key)
102
- for c in split_into_chunks(cls.purge_numeric_citation(cls.strip(safe_text_read(path))), **kwargs)
103
- ]
104
-
105
- logger.debug(f"Number of chunks created from file {path.as_posix()}: {len(result)}")
106
- return result
107
-
108
- @classmethod
109
- def strip(cls, string: str) -> str:
110
- """Strip the head and tail of the string."""
111
- logger.debug(f"String length before strip: {(original := len(string))}")
112
- for split in (s for s in cls.head_split if s in string):
113
- logger.debug(f"Strip head using {split}")
114
- parts = string.split(split)
115
- string = split.join(parts[1:]) if len(parts) > 1 else parts[0]
116
- break
117
- logger.debug(
118
- f"String length after head strip: {(stripped_len := len(string))}, decreased by {(d := original - stripped_len)}"
119
- )
120
- if not d:
121
- logger.warning("No decrease at head strip, which is might be abnormal.")
122
- for split in (s for s in cls.tail_split if s in string):
123
- logger.debug(f"Strip tail using {split}")
124
- parts = string.split(split)
125
- string = split.join(parts[:-1]) if len(parts) > 1 else parts[0]
126
- break
127
- logger.debug(f"String length after tail strip: {len(string)}, decreased by {(d := stripped_len - len(string))}")
128
- if not d:
129
- logger.warning("No decrease at tail strip, which is might be abnormal.")
130
-
131
- return string
132
-
133
- def as_typst_cite(self) -> str:
134
- """As typst cite."""
135
- return f"#cite(<{self.bibtex_cite_key}>)"
136
-
137
- @staticmethod
138
- def purge_numeric_citation(string: str) -> str:
139
- """Purge numeric citation."""
140
- import re
141
-
142
- return re.sub(r"\[[\d\s,\\~–-]+]", "", string)
143
-
144
- @property
145
- def auther_lastnames(self) -> List[str]:
146
- """Get the last name of the authors."""
147
- return [n.split()[-1] for n in self.authors]
148
-
149
- def as_auther_seq(self) -> str:
150
- """Get the auther sequence."""
151
- match len(self.authors):
152
- case 0:
153
- raise ValueError("No authors found")
154
- case 1:
155
- return f"({self.auther_lastnames[0]},{self.year}){self.as_typst_cite()}"
156
- case 2:
157
- return f"({self.auther_lastnames[0]}{self.and_word}{self.auther_lastnames[1]},{self.year}){self.as_typst_cite()}"
158
- case 3:
159
- return f"({self.auther_lastnames[0]},{self.auther_lastnames[1]}{self.and_word}{self.auther_lastnames[2]},{self.year}){self.as_typst_cite()}"
160
- case _:
161
- return f"({self.auther_lastnames[0]},{self.auther_lastnames[1]}{self.and_word}{self.auther_lastnames[2]}{self.etc_word},{self.year}){self.as_typst_cite()}"
162
-
163
- def update_cite_number(self, cite_number: int) -> Self:
164
- """Update the cite number."""
165
- self._cite_number = cite_number
166
- return self
167
-
168
-
169
- @dataclass
170
- class CitationManager(AsPrompt):
171
- """Citation manager."""
172
-
173
- article_chunks: List[ArticleChunk] = field(default_factory=list)
174
- """Article chunks."""
175
-
176
- pat: str = r"(\[\[([\d\s,-]*)]])"
177
- """Regex pattern to match citations."""
178
- sep: str = ","
179
- """Separator for citation numbers."""
180
- abbr_sep: str = "-"
181
- """Separator for abbreviated citation numbers."""
182
-
183
- def update_chunks(
184
- self, article_chunks: List[ArticleChunk], set_cite_number: bool = True, dedup: bool = True
185
- ) -> Self:
186
- """Update article chunks."""
187
- self.article_chunks.clear()
188
- self.article_chunks.extend(article_chunks)
189
- if dedup:
190
- self.article_chunks = list(unique(self.article_chunks, lambda c: blake3_hash(c.chunk.encode())))
191
- if set_cite_number:
192
- self.set_cite_number_all()
193
- return self
194
-
195
- def empty(self) -> Self:
196
- """Empty the article chunks."""
197
- self.article_chunks.clear()
198
- return self
199
-
200
- def add_chunks(self, article_chunks: List[ArticleChunk], set_cite_number: bool = True, dedup: bool = True) -> Self:
201
- """Add article chunks."""
202
- self.article_chunks.extend(article_chunks)
203
- if dedup:
204
- self.article_chunks = list(unique(self.article_chunks, lambda c: blake3_hash(c.chunk.encode())))
205
- if set_cite_number:
206
- self.set_cite_number_all()
207
- return self
208
-
209
- def set_cite_number_all(self) -> Self:
210
- """Set citation numbers for all article chunks."""
211
- number_mapping = {a.bibtex_cite_key: 0 for a in self.article_chunks}
212
-
213
- for i, k in enumerate(number_mapping.keys()):
214
- number_mapping[k] = i
215
-
216
- for a in self.article_chunks:
217
- a.update_cite_number(number_mapping[a.bibtex_cite_key])
218
- return self
219
-
220
- def _as_prompt_inner(self) -> Dict[str, str]:
221
- """Generate prompt inner representation."""
222
- seg = []
223
- for k, g_iter in groupby(self.article_chunks, key=lambda a: a.bibtex_cite_key):
224
- g = list(g_iter)
225
-
226
- logger.debug(f"Group [{k}]: {len(g)}")
227
- seg.append(wrapp_in_block("\n\n".join(a.chunk for a in g), first(g).reference_header))
228
- return {"References": "\n".join(seg)}
229
-
230
- def apply(self, string: str) -> str:
231
- """Apply citation replacements to the input string."""
232
- for origin, m in re.findall(self.pat, string):
233
- logger.info(f"Matching citation: {m}")
234
- notations = self.convert_to_numeric_notations(m)
235
- logger.info(f"Citing Notations: {notations}")
236
- citation_number_seq = list(flatten(self.decode_expr(n) for n in notations))
237
- logger.info(f"Citation Number Sequence: {citation_number_seq}")
238
- dedup = self.deduplicate_citation(citation_number_seq)
239
- logger.info(f"Deduplicated Citation Number Sequence: {dedup}")
240
- string = string.replace(origin, self.unpack_cite_seq(dedup))
241
- return string
242
-
243
- def citation_count(self, string: str) -> int:
244
- """Get the citation count in the string."""
245
- count = 0
246
- for _, m in re.findall(self.pat, string):
247
- logger.info(f"Matching citation: {m}")
248
- notations = self.convert_to_numeric_notations(m)
249
- logger.info(f"Citing Notations: {notations}")
250
- citation_number_seq = list(flatten(self.decode_expr(n) for n in notations))
251
- logger.info(f"Citation Number Sequence: {citation_number_seq}")
252
- count += len(dedup := self.deduplicate_citation(citation_number_seq))
253
- logger.info(f"Deduplicated Citation Number Sequence: {dedup}")
254
- return count
255
-
256
- def citation_coverage(self, string: str) -> float:
257
- """Get the citation coverage in the string."""
258
- return self.citation_count(string) / len(self.article_chunks)
259
-
260
- def decode_expr(self, string: str) -> List[int]:
261
- """Decode citation expression into a list of integers."""
262
- if self.abbr_sep in string:
263
- start, end = string.split(self.abbr_sep)
264
- return list(range(int(start), int(end) + 1))
265
- return [int(string)]
266
-
267
- def convert_to_numeric_notations(self, string: str) -> List[str]:
268
- """Convert citation string into numeric notations."""
269
- return [s.strip() for s in string.split(self.sep)]
270
-
271
- def deduplicate_citation(self, citation_seq: List[int]) -> List[int]:
272
- """Deduplicate citation sequence."""
273
- chunk_seq = [a for a in self.article_chunks if a.cite_number in citation_seq]
274
- deduped = unique(chunk_seq, lambda a: a.bibtex_cite_key)
275
- return [a.cite_number for a in deduped]
276
-
277
- def unpack_cite_seq(self, citation_seq: List[int]) -> str:
278
- """Unpack citation sequence into a string."""
279
- chunk_seq = {a.bibtex_cite_key: a for a in self.article_chunks if a.cite_number in citation_seq}
280
- return "".join(a.as_typst_cite() for a in chunk_seq.values())
281
-
282
- def as_milvus_filter_expr(self, blacklist: bool = True) -> str:
283
- """Asynchronously fetches documents from a Milvus database based on input vectors."""
284
- if blacklist:
285
- return " and ".join(f'bibtex_cite_key != "{a.bibtex_cite_key}"' for a in self.article_chunks)
286
- return " or ".join(f'bibtex_cite_key == "{a.bibtex_cite_key}"' for a in self.article_chunks)