fabricatio 0.2.10.dev0__cp312-cp312-win_amd64.whl → 0.2.11__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fabricatio/actions/article.py +55 -10
- fabricatio/actions/article_rag.py +297 -12
- fabricatio/actions/fs.py +25 -0
- fabricatio/actions/output.py +17 -3
- fabricatio/actions/rag.py +42 -20
- fabricatio/actions/rules.py +14 -3
- fabricatio/capabilities/extract.py +70 -0
- fabricatio/capabilities/rag.py +5 -2
- fabricatio/capabilities/rating.py +5 -2
- fabricatio/capabilities/task.py +16 -16
- fabricatio/config.py +9 -2
- fabricatio/decorators.py +43 -26
- fabricatio/fs/__init__.py +9 -2
- fabricatio/fs/readers.py +6 -10
- fabricatio/models/action.py +16 -11
- fabricatio/models/adv_kwargs_types.py +5 -12
- fabricatio/models/extra/aricle_rag.py +254 -0
- fabricatio/models/extra/article_base.py +56 -7
- fabricatio/models/extra/article_essence.py +8 -7
- fabricatio/models/extra/article_main.py +102 -6
- fabricatio/models/extra/problem.py +5 -1
- fabricatio/models/extra/rag.py +49 -23
- fabricatio/models/generic.py +43 -24
- fabricatio/models/kwargs_types.py +12 -3
- fabricatio/models/task.py +13 -1
- fabricatio/models/usages.py +10 -27
- fabricatio/parser.py +16 -12
- fabricatio/rust.cp312-win_amd64.pyd +0 -0
- fabricatio/rust.pyi +177 -63
- fabricatio/utils.py +50 -10
- fabricatio-0.2.11.data/scripts/tdown.exe +0 -0
- {fabricatio-0.2.10.dev0.dist-info → fabricatio-0.2.11.dist-info}/METADATA +20 -12
- fabricatio-0.2.11.dist-info/RECORD +65 -0
- fabricatio-0.2.10.dev0.data/scripts/tdown.exe +0 -0
- fabricatio-0.2.10.dev0.dist-info/RECORD +0 -62
- {fabricatio-0.2.10.dev0.dist-info → fabricatio-0.2.11.dist-info}/WHEEL +0 -0
- {fabricatio-0.2.10.dev0.dist-info → fabricatio-0.2.11.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,254 @@
|
|
1
|
+
"""A Module containing the article rag models."""
|
2
|
+
|
3
|
+
import re
|
4
|
+
from pathlib import Path
|
5
|
+
from typing import ClassVar, Dict, List, Optional, Self, Unpack
|
6
|
+
|
7
|
+
from fabricatio.fs import safe_text_read
|
8
|
+
from fabricatio.journal import logger
|
9
|
+
from fabricatio.models.extra.rag import MilvusDataBase
|
10
|
+
from fabricatio.models.generic import AsPrompt
|
11
|
+
from fabricatio.models.kwargs_types import ChunkKwargs
|
12
|
+
from fabricatio.rust import BibManager, blake3_hash, is_chinese, split_into_chunks
|
13
|
+
from fabricatio.utils import ok
|
14
|
+
from more_itertools.recipes import flatten, unique
|
15
|
+
from pydantic import Field
|
16
|
+
|
17
|
+
|
18
|
+
class ArticleChunk(MilvusDataBase, AsPrompt):
|
19
|
+
"""The chunk of an article."""
|
20
|
+
|
21
|
+
etc_word: ClassVar[str] = "等"
|
22
|
+
and_word: ClassVar[str] = "与"
|
23
|
+
_cite_number: Optional[int] = None
|
24
|
+
|
25
|
+
head_split: ClassVar[List[str]] = [
|
26
|
+
"引 言",
|
27
|
+
"引言",
|
28
|
+
"绪 论",
|
29
|
+
"绪论",
|
30
|
+
"前言",
|
31
|
+
"INTRODUCTION",
|
32
|
+
"Introduction",
|
33
|
+
]
|
34
|
+
tail_split: ClassVar[List[str]] = [
|
35
|
+
"参 考 文 献",
|
36
|
+
"参 考 文 献",
|
37
|
+
"参考文献",
|
38
|
+
"REFERENCES",
|
39
|
+
"References",
|
40
|
+
"Bibliography",
|
41
|
+
"Reference",
|
42
|
+
]
|
43
|
+
chunk: str
|
44
|
+
"""The segment of the article"""
|
45
|
+
year: int
|
46
|
+
"""The year of the article"""
|
47
|
+
authors: List[str] = Field(default_factory=list)
|
48
|
+
"""The authors of the article"""
|
49
|
+
article_title: str
|
50
|
+
"""The title of the article"""
|
51
|
+
bibtex_cite_key: str
|
52
|
+
"""The bibtex cite key of the article"""
|
53
|
+
|
54
|
+
def _as_prompt_inner(self) -> Dict[str, str]:
|
55
|
+
return {
|
56
|
+
f"[[{ok(self._cite_number, 'You need to update cite number first.')}]] reference `{self.article_title}` from {self.as_auther_seq()}": self.chunk
|
57
|
+
}
|
58
|
+
|
59
|
+
@property
|
60
|
+
def cite_number(self) -> int:
|
61
|
+
"""Get the cite number."""
|
62
|
+
return ok(self._cite_number, "cite number not set")
|
63
|
+
|
64
|
+
def _prepare_vectorization_inner(self) -> str:
|
65
|
+
return self.chunk
|
66
|
+
|
67
|
+
@classmethod
|
68
|
+
def from_file[P: str | Path](
|
69
|
+
cls, path: P | List[P], bib_mgr: BibManager, **kwargs: Unpack[ChunkKwargs]
|
70
|
+
) -> List[Self]:
|
71
|
+
"""Load the article chunks from the file."""
|
72
|
+
if isinstance(path, list):
|
73
|
+
result = list(flatten(cls._from_file_inner(p, bib_mgr, **kwargs) for p in path))
|
74
|
+
logger.debug(f"Number of chunks created from list of files: {len(result)}")
|
75
|
+
return result
|
76
|
+
|
77
|
+
return cls._from_file_inner(path, bib_mgr, **kwargs)
|
78
|
+
|
79
|
+
@classmethod
|
80
|
+
def _from_file_inner(cls, path: str | Path, bib_mgr: BibManager, **kwargs: Unpack[ChunkKwargs]) -> List[Self]:
|
81
|
+
path = Path(path)
|
82
|
+
|
83
|
+
title_seg = path.stem.split(" - ").pop()
|
84
|
+
|
85
|
+
key = (
|
86
|
+
bib_mgr.get_cite_key_by_title(title_seg)
|
87
|
+
or bib_mgr.get_cite_key_by_title_fuzzy(title_seg)
|
88
|
+
or bib_mgr.get_cite_key_fuzzy(path.stem)
|
89
|
+
)
|
90
|
+
if key is None:
|
91
|
+
logger.warning(f"no cite key found for {path.as_posix()}, skip.")
|
92
|
+
return []
|
93
|
+
authors = ok(bib_mgr.get_author_by_key(key), f"no author found for {key}")
|
94
|
+
year = ok(bib_mgr.get_year_by_key(key), f"no year found for {key}")
|
95
|
+
article_title = ok(bib_mgr.get_title_by_key(key), f"no title found for {key}")
|
96
|
+
|
97
|
+
result = [
|
98
|
+
cls(chunk=c, year=year, authors=authors, article_title=article_title, bibtex_cite_key=key)
|
99
|
+
for c in split_into_chunks(cls.purge_numeric_citation(cls.strip(safe_text_read(path))), **kwargs)
|
100
|
+
]
|
101
|
+
|
102
|
+
logger.debug(f"Number of chunks created from file {path.as_posix()}: {len(result)}")
|
103
|
+
return result
|
104
|
+
|
105
|
+
@classmethod
|
106
|
+
def strip(cls, string: str) -> str:
|
107
|
+
"""Strip the head and tail of the string."""
|
108
|
+
logger.debug(f"String length before strip: {(original := len(string))}")
|
109
|
+
for split in (s for s in cls.head_split if s in string):
|
110
|
+
logger.debug(f"Strip head using {split}")
|
111
|
+
parts = string.split(split)
|
112
|
+
string = split.join(parts[1:]) if len(parts) > 1 else parts[0]
|
113
|
+
break
|
114
|
+
logger.debug(
|
115
|
+
f"String length after head strip: {(stripped_len := len(string))}, decreased by {(d := original - stripped_len)}"
|
116
|
+
)
|
117
|
+
if not d:
|
118
|
+
logger.warning("No decrease at head strip, which is might be abnormal.")
|
119
|
+
for split in (s for s in cls.tail_split if s in string):
|
120
|
+
logger.debug(f"Strip tail using {split}")
|
121
|
+
parts = string.split(split)
|
122
|
+
string = split.join(parts[:-1]) if len(parts) > 1 else parts[0]
|
123
|
+
break
|
124
|
+
logger.debug(f"String length after tail strip: {len(string)}, decreased by {(d := stripped_len - len(string))}")
|
125
|
+
if not d:
|
126
|
+
logger.warning("No decrease at tail strip, which is might be abnormal.")
|
127
|
+
|
128
|
+
return string
|
129
|
+
|
130
|
+
def as_typst_cite(self) -> str:
|
131
|
+
"""As typst cite."""
|
132
|
+
return f"#cite(<{self.bibtex_cite_key}>)"
|
133
|
+
|
134
|
+
@staticmethod
|
135
|
+
def purge_numeric_citation(string: str) -> str:
|
136
|
+
"""Purge numeric citation."""
|
137
|
+
import re
|
138
|
+
|
139
|
+
return re.sub(r"\[[\d\s,\\~–-]+]", "", string)
|
140
|
+
|
141
|
+
@property
|
142
|
+
def auther_firstnames(self) -> List[str]:
|
143
|
+
"""Get the first name of the authors."""
|
144
|
+
ret = []
|
145
|
+
for n in self.authors:
|
146
|
+
if is_chinese(n):
|
147
|
+
ret.append(n[0])
|
148
|
+
else:
|
149
|
+
ret.append(n.split()[-1])
|
150
|
+
return ret
|
151
|
+
|
152
|
+
def as_auther_seq(self) -> str:
|
153
|
+
"""Get the auther sequence."""
|
154
|
+
match len(self.authors):
|
155
|
+
case 0:
|
156
|
+
raise ValueError("No authors found")
|
157
|
+
case 1:
|
158
|
+
return f"({self.auther_firstnames[0]},{self.year}){self.as_typst_cite()}"
|
159
|
+
case 2:
|
160
|
+
return f"({self.auther_firstnames[0]}{self.and_word}{self.auther_firstnames[1]},{self.year}){self.as_typst_cite()}"
|
161
|
+
case 3:
|
162
|
+
return f"({self.auther_firstnames[0]},{self.auther_firstnames[1]}{self.and_word}{self.auther_firstnames[2]},{self.year}){self.as_typst_cite()}"
|
163
|
+
case _:
|
164
|
+
return f"({self.auther_firstnames[0]},{self.auther_firstnames[1]}{self.and_word}{self.auther_firstnames[2]}{self.etc_word},{self.year}){self.as_typst_cite()}"
|
165
|
+
|
166
|
+
def update_cite_number(self, cite_number: int) -> Self:
|
167
|
+
"""Update the cite number."""
|
168
|
+
self._cite_number = cite_number
|
169
|
+
return self
|
170
|
+
|
171
|
+
|
172
|
+
class CitationManager(AsPrompt):
|
173
|
+
"""Citation manager."""
|
174
|
+
|
175
|
+
article_chunks: List[ArticleChunk] = Field(default_factory=list)
|
176
|
+
"""Article chunks."""
|
177
|
+
|
178
|
+
pat: str = r"(\[\[([\d\s,-]*)]])"
|
179
|
+
"""Regex pattern to match citations."""
|
180
|
+
sep: str = ","
|
181
|
+
"""Separator for citation numbers."""
|
182
|
+
abbr_sep: str = "-"
|
183
|
+
"""Separator for abbreviated citation numbers."""
|
184
|
+
|
185
|
+
def update_chunks(
|
186
|
+
self, article_chunks: List[ArticleChunk], set_cite_number: bool = True, dedup: bool = True
|
187
|
+
) -> Self:
|
188
|
+
"""Update article chunks."""
|
189
|
+
self.article_chunks.clear()
|
190
|
+
self.article_chunks.extend(article_chunks)
|
191
|
+
if dedup:
|
192
|
+
self.article_chunks = list(unique(self.article_chunks, lambda c: blake3_hash(c.chunk.encode())))
|
193
|
+
if set_cite_number:
|
194
|
+
self.set_cite_number_all()
|
195
|
+
return self
|
196
|
+
|
197
|
+
def empty(self) -> Self:
|
198
|
+
"""Empty the article chunks."""
|
199
|
+
self.article_chunks.clear()
|
200
|
+
return self
|
201
|
+
|
202
|
+
def add_chunks(self, article_chunks: List[ArticleChunk], set_cite_number: bool = True, dedup: bool = True) -> Self:
|
203
|
+
"""Add article chunks."""
|
204
|
+
self.article_chunks.extend(article_chunks)
|
205
|
+
if dedup:
|
206
|
+
self.article_chunks = list(unique(self.article_chunks, lambda c: blake3_hash(c.chunk.encode())))
|
207
|
+
if set_cite_number:
|
208
|
+
self.set_cite_number_all()
|
209
|
+
return self
|
210
|
+
|
211
|
+
def set_cite_number_all(self) -> Self:
|
212
|
+
"""Set citation numbers for all article chunks."""
|
213
|
+
for i, a in enumerate(self.article_chunks, 1):
|
214
|
+
a.update_cite_number(i)
|
215
|
+
return self
|
216
|
+
|
217
|
+
def _as_prompt_inner(self) -> Dict[str, str]:
|
218
|
+
"""Generate prompt inner representation."""
|
219
|
+
return {"References": "\n".join(r.as_prompt() for r in self.article_chunks)}
|
220
|
+
|
221
|
+
def apply(self, string: str) -> str:
|
222
|
+
"""Apply citation replacements to the input string."""
|
223
|
+
for origin, m in re.findall(self.pat, string):
|
224
|
+
logger.info(f"Matching citation: {m}")
|
225
|
+
notations = self.convert_to_numeric_notations(m)
|
226
|
+
logger.info(f"Citing Notations: {notations}")
|
227
|
+
citation_number_seq = list(flatten(self.decode_expr(n) for n in notations))
|
228
|
+
logger.info(f"Citation Number Sequence: {citation_number_seq}")
|
229
|
+
dedup = self.deduplicate_citation(citation_number_seq)
|
230
|
+
logger.info(f"Deduplicated Citation Number Sequence: {dedup}")
|
231
|
+
string = string.replace(origin, self.unpack_cite_seq(dedup))
|
232
|
+
return string
|
233
|
+
|
234
|
+
def decode_expr(self, string: str) -> List[int]:
|
235
|
+
"""Decode citation expression into a list of integers."""
|
236
|
+
if self.abbr_sep in string:
|
237
|
+
start, end = string.split(self.abbr_sep)
|
238
|
+
return list(range(int(start), int(end) + 1))
|
239
|
+
return [int(string)]
|
240
|
+
|
241
|
+
def convert_to_numeric_notations(self, string: str) -> List[str]:
|
242
|
+
"""Convert citation string into numeric notations."""
|
243
|
+
return [s.strip() for s in string.split(self.sep)]
|
244
|
+
|
245
|
+
def deduplicate_citation(self, citation_seq: List[int]) -> List[int]:
|
246
|
+
"""Deduplicate citation sequence."""
|
247
|
+
chunk_seq = [a for a in self.article_chunks if a.cite_number in citation_seq]
|
248
|
+
deduped = unique(chunk_seq, lambda a: a.bibtex_cite_key)
|
249
|
+
return [a.cite_number for a in deduped]
|
250
|
+
|
251
|
+
def unpack_cite_seq(self, citation_seq: List[int]) -> str:
|
252
|
+
"""Unpack citation sequence into a string."""
|
253
|
+
chunk_seq = [a for a in self.article_chunks if a.cite_number in citation_seq]
|
254
|
+
return "".join(a.as_typst_cite() for a in chunk_seq)
|
@@ -18,6 +18,7 @@ from fabricatio.models.generic import (
|
|
18
18
|
Titled,
|
19
19
|
WordCount,
|
20
20
|
)
|
21
|
+
from fabricatio.rust import comment
|
21
22
|
from pydantic import Field
|
22
23
|
|
23
24
|
|
@@ -29,11 +30,9 @@ class ReferringType(StrEnum):
|
|
29
30
|
SUBSECTION = "subsection"
|
30
31
|
|
31
32
|
|
32
|
-
|
33
33
|
type RefKey = Tuple[str, Optional[str], Optional[str]]
|
34
34
|
|
35
35
|
|
36
|
-
|
37
36
|
class ArticleMetaData(SketchedAble, Described, WordCount, Titled, Language):
|
38
37
|
"""Metadata for an article component."""
|
39
38
|
|
@@ -47,7 +46,16 @@ class ArticleMetaData(SketchedAble, Described, WordCount, Titled, Language):
|
|
47
46
|
aims: List[str]
|
48
47
|
"""List of writing aims of the research component in academic style."""
|
49
48
|
|
50
|
-
|
49
|
+
@property
|
50
|
+
def typst_metadata_comment(self) -> str:
|
51
|
+
"""Generates a comment for the metadata of the article component."""
|
52
|
+
return (
|
53
|
+
(comment(f"Desc:\n{self.description}\n") if self.description else "")
|
54
|
+
+ (comment(f"Aims:\n{'\n '.join(self.aims)}\n") if self.aims else "")
|
55
|
+
+ (comment(f"Expected Word Count:{self.expected_word_count}") if self.expected_word_count else "")
|
56
|
+
if self.expected_word_count
|
57
|
+
else ""
|
58
|
+
)
|
51
59
|
|
52
60
|
|
53
61
|
class ArticleOutlineBase(
|
@@ -92,7 +100,7 @@ class SubSectionBase(ArticleOutlineBase):
|
|
92
100
|
|
93
101
|
def to_typst_code(self) -> str:
|
94
102
|
"""Converts the component into a Typst code snippet for rendering."""
|
95
|
-
return f"=== {self.title}\n"
|
103
|
+
return f"=== {self.title}\n{self.typst_metadata_comment}\n"
|
96
104
|
|
97
105
|
def introspect(self) -> str:
|
98
106
|
"""Introspects the article subsection outline."""
|
@@ -117,7 +125,9 @@ class SectionBase[T: SubSectionBase](ArticleOutlineBase):
|
|
117
125
|
Returns:
|
118
126
|
str: The formatted Typst code snippet.
|
119
127
|
"""
|
120
|
-
return f"== {self.title}\n" + "\n\n".join(
|
128
|
+
return f"== {self.title}\n{self.typst_metadata_comment}\n" + "\n\n".join(
|
129
|
+
subsec.to_typst_code() for subsec in self.subsections
|
130
|
+
)
|
121
131
|
|
122
132
|
def resolve_update_conflict(self, other: Self) -> str:
|
123
133
|
"""Resolve update errors in the article outline."""
|
@@ -160,7 +170,9 @@ class ChapterBase[T: SectionBase](ArticleOutlineBase):
|
|
160
170
|
|
161
171
|
def to_typst_code(self) -> str:
|
162
172
|
"""Converts the chapter into a Typst formatted code snippet for rendering."""
|
163
|
-
return f"= {self.title}\n" + "\n\n".join(
|
173
|
+
return f"= {self.title}\n{self.typst_metadata_comment}\n" + "\n\n".join(
|
174
|
+
sec.to_typst_code() for sec in self.sections
|
175
|
+
)
|
164
176
|
|
165
177
|
def resolve_update_conflict(self, other: Self) -> str:
|
166
178
|
"""Resolve update errors in the article outline."""
|
@@ -302,4 +314,41 @@ class ArticleBase[T: ChapterBase](FinalizedDumpAble, AsPrompt, WordCount, Descri
|
|
302
314
|
=== Implementation Details
|
303
315
|
== Evaluation Protocol
|
304
316
|
"""
|
305
|
-
return
|
317
|
+
return (
|
318
|
+
comment(
|
319
|
+
f"Title:{self.title}\n"
|
320
|
+
+ (f"Desc:\n{self.description}\n" if self.description else "")
|
321
|
+
+ f"Word Count:{self.expected_word_count}"
|
322
|
+
if self.expected_word_count
|
323
|
+
else ""
|
324
|
+
)
|
325
|
+
+ "\n\n"
|
326
|
+
+ "\n\n".join(a.to_typst_code() for a in self.chapters)
|
327
|
+
)
|
328
|
+
|
329
|
+
def avg_chap_wordcount[S](self:S) -> S:
|
330
|
+
"""Set all chap have same word count sum up to be `self.expected_word_count`."""
|
331
|
+
avg = int(self.expected_word_count / len(self.chapters))
|
332
|
+
for c in self.chapters:
|
333
|
+
c.expected_word_count = avg
|
334
|
+
return self
|
335
|
+
|
336
|
+
def avg_sec_wordcount[S](self:S) -> S:
|
337
|
+
"""Set all sec have same word count sum up to be `self.expected_word_count`."""
|
338
|
+
for c in self.chapters:
|
339
|
+
avg = int(c.expected_word_count / len(c.sections))
|
340
|
+
for s in c.sections:
|
341
|
+
s.expected_word_count = avg
|
342
|
+
return self
|
343
|
+
|
344
|
+
def avg_subsec_wordcount[S](self:S) -> S:
|
345
|
+
"""Set all subsec have same word count sum up to be `self.expected_word_count`."""
|
346
|
+
for _, s in self.iter_sections():
|
347
|
+
avg = int(s.expected_word_count / len(s.subsections))
|
348
|
+
for ss in s.subsections:
|
349
|
+
ss.expected_word_count = avg
|
350
|
+
return self
|
351
|
+
|
352
|
+
def avg_wordcount_recursive(self) -> Self:
|
353
|
+
"""Set all chap, sec, subsec have same word count sum up to be `self.expected_word_count`."""
|
354
|
+
return self.avg_chap_wordcount().avg_sec_wordcount().avg_sec_wordcount()
|
@@ -1,8 +1,9 @@
|
|
1
1
|
"""ArticleEssence: Semantic fingerprint of academic paper for structured analysis."""
|
2
2
|
|
3
|
-
from typing import List
|
3
|
+
from typing import List
|
4
4
|
|
5
|
-
from fabricatio.models.
|
5
|
+
from fabricatio.models.extra.rag import MilvusDataBase
|
6
|
+
from fabricatio.models.generic import PersistentAble, SketchedAble
|
6
7
|
from pydantic import BaseModel
|
7
8
|
|
8
9
|
|
@@ -54,7 +55,7 @@ class Highlightings(BaseModel):
|
|
54
55
|
"""
|
55
56
|
|
56
57
|
|
57
|
-
class ArticleEssence(
|
58
|
+
class ArticleEssence(SketchedAble, PersistentAble, MilvusDataBase):
|
58
59
|
"""Structured representation of a scientific article's core elements in its original language."""
|
59
60
|
|
60
61
|
language: str
|
@@ -93,7 +94,7 @@ class ArticleEssence(ProposedAble, Display, PersistentAble, Vectorizable):
|
|
93
94
|
bibtex_cite_key: str
|
94
95
|
"""Bibtex cite key of the original article."""
|
95
96
|
|
96
|
-
def
|
97
|
-
|
98
|
-
|
99
|
-
|
97
|
+
def _prepare_vectorization_inner(self) -> str:
|
98
|
+
return self.compact()
|
99
|
+
|
100
|
+
|
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
from typing import Dict, Generator, List, Self, Tuple, override
|
4
4
|
|
5
|
+
from fabricatio.decorators import precheck_package
|
5
6
|
from fabricatio.fs.readers import extract_sections
|
6
7
|
from fabricatio.journal import logger
|
7
8
|
from fabricatio.models.extra.article_base import (
|
@@ -11,11 +12,15 @@ from fabricatio.models.extra.article_base import (
|
|
11
12
|
SubSectionBase,
|
12
13
|
)
|
13
14
|
from fabricatio.models.extra.article_outline import (
|
15
|
+
ArticleChapterOutline,
|
14
16
|
ArticleOutline,
|
17
|
+
ArticleSectionOutline,
|
18
|
+
ArticleSubsectionOutline,
|
15
19
|
)
|
16
20
|
from fabricatio.models.generic import Described, PersistentAble, SequencePatch, SketchedAble, WithRef, WordCount
|
17
|
-
from fabricatio.rust import word_count
|
18
|
-
from
|
21
|
+
from fabricatio.rust import convert_all_block_tex, convert_all_inline_tex, word_count
|
22
|
+
from fabricatio.utils import fallback_kwargs
|
23
|
+
from pydantic import Field, NonNegativeInt
|
19
24
|
|
20
25
|
PARAGRAPH_SEP = "// - - -"
|
21
26
|
|
@@ -23,6 +28,9 @@ PARAGRAPH_SEP = "// - - -"
|
|
23
28
|
class Paragraph(SketchedAble, WordCount, Described):
|
24
29
|
"""Structured academic paragraph blueprint for controlled content generation."""
|
25
30
|
|
31
|
+
expected_word_count: NonNegativeInt = 0
|
32
|
+
"""The expected word count of this paragraph, 0 means not specified"""
|
33
|
+
|
26
34
|
description: str = Field(
|
27
35
|
alias="elaboration",
|
28
36
|
description=Described.model_fields["description"].description,
|
@@ -85,7 +93,7 @@ class ArticleSubsection(SubSectionBase):
|
|
85
93
|
Returns:
|
86
94
|
str: Typst code snippet for rendering.
|
87
95
|
"""
|
88
|
-
return
|
96
|
+
return super().to_typst_code() + f"\n\n{PARAGRAPH_SEP}\n\n".join(p.content for p in self.paragraphs)
|
89
97
|
|
90
98
|
@classmethod
|
91
99
|
def from_typst_code(cls, title: str, body: str) -> Self:
|
@@ -153,10 +161,74 @@ class Article(
|
|
153
161
|
"Original Article": self.display(),
|
154
162
|
}
|
155
163
|
|
164
|
+
def convert_tex(self) -> Self:
|
165
|
+
"""Convert tex to typst code."""
|
166
|
+
for _, _, subsec in self.iter_subsections():
|
167
|
+
for p in subsec.paragraphs:
|
168
|
+
p.content = convert_all_inline_tex(p.content)
|
169
|
+
p.content = convert_all_block_tex(p.content)
|
170
|
+
return self
|
171
|
+
|
172
|
+
def fix_wrapper(self) -> Self:
|
173
|
+
"""Fix wrapper."""
|
174
|
+
for _, _, subsec in self.iter_subsections():
|
175
|
+
for p in subsec.paragraphs:
|
176
|
+
p.content = (
|
177
|
+
p.content.replace(r" \( ", "$")
|
178
|
+
.replace(r" \) ", "$")
|
179
|
+
.replace("\\[\n", "$$\n")
|
180
|
+
.replace("\n\\]", "\n$$")
|
181
|
+
)
|
182
|
+
return self
|
183
|
+
|
156
184
|
@override
|
157
185
|
def iter_subsections(self) -> Generator[Tuple[ArticleChapter, ArticleSection, ArticleSubsection], None, None]:
|
158
186
|
return super().iter_subsections() # pyright: ignore [reportReturnType]
|
159
187
|
|
188
|
+
def extrac_outline(self) -> ArticleOutline:
|
189
|
+
"""Extract outline from article."""
|
190
|
+
# Create an empty list to hold chapter outlines
|
191
|
+
chapters = []
|
192
|
+
|
193
|
+
# Iterate through each chapter in the article
|
194
|
+
for chapter in self.chapters:
|
195
|
+
# Create an empty list to hold section outlines
|
196
|
+
sections = []
|
197
|
+
|
198
|
+
# Iterate through each section in the chapter
|
199
|
+
for section in chapter.sections:
|
200
|
+
# Create an empty list to hold subsection outlines
|
201
|
+
subsections = []
|
202
|
+
|
203
|
+
# Iterate through each subsection in the section
|
204
|
+
for subsection in section.subsections:
|
205
|
+
# Create a subsection outline and add it to the list
|
206
|
+
subsections.append(
|
207
|
+
ArticleSubsectionOutline(**subsection.model_dump(exclude={"paragraphs"}, by_alias=True))
|
208
|
+
)
|
209
|
+
|
210
|
+
# Create a section outline and add it to the list
|
211
|
+
sections.append(
|
212
|
+
ArticleSectionOutline(
|
213
|
+
**section.model_dump(exclude={"subsections"}, by_alias=True),
|
214
|
+
subsections=subsections,
|
215
|
+
)
|
216
|
+
)
|
217
|
+
|
218
|
+
# Create a chapter outline and add it to the list
|
219
|
+
chapters.append(
|
220
|
+
ArticleChapterOutline(
|
221
|
+
**chapter.model_dump(exclude={"sections"}, by_alias=True),
|
222
|
+
sections=sections,
|
223
|
+
)
|
224
|
+
)
|
225
|
+
|
226
|
+
# Create and return the article outline
|
227
|
+
return ArticleOutline(
|
228
|
+
**self.model_dump(exclude={"chapters"}, by_alias=True),
|
229
|
+
chapters=chapters,
|
230
|
+
)
|
231
|
+
|
160
232
|
@classmethod
|
161
233
|
def from_outline(cls, outline: ArticleOutline) -> "Article":
|
162
234
|
"""Generates an article from the given outline.
|
@@ -194,13 +266,37 @@ class Article(
|
|
194
266
|
return article
|
195
267
|
|
196
268
|
@classmethod
|
197
|
-
def from_typst_code(cls, title: str, body: str) -> Self:
|
269
|
+
def from_typst_code(cls, title: str, body: str, **kwargs) -> Self:
|
198
270
|
"""Generates an article from the given Typst code."""
|
199
271
|
return cls(
|
200
272
|
chapters=[
|
201
273
|
ArticleChapter.from_typst_code(*pack) for pack in extract_sections(body, level=1, section_char="=")
|
202
274
|
],
|
203
275
|
heading=title,
|
204
|
-
|
205
|
-
|
276
|
+
**fallback_kwargs(
|
277
|
+
kwargs,
|
278
|
+
expected_word_count=word_count(body),
|
279
|
+
abstract="",
|
280
|
+
),
|
206
281
|
)
|
282
|
+
|
283
|
+
@classmethod
|
284
|
+
def from_mixed_source(cls, article_outline: ArticleOutline, typst_code: str) -> Self:
|
285
|
+
"""Generates an article from the given outline and Typst code."""
|
286
|
+
self = cls.from_typst_code(article_outline.title, typst_code)
|
287
|
+
self.expected_word_count = article_outline.expected_word_count
|
288
|
+
self.description = article_outline.description
|
289
|
+
for a, o in zip(self.iter_dfs(), article_outline.iter_dfs(), strict=True):
|
290
|
+
a.update_metadata(o)
|
291
|
+
return self.update_ref(article_outline)
|
292
|
+
|
293
|
+
@precheck_package(
|
294
|
+
"questionary", "'questionary' is required to run this function. Have you installed `fabricatio[qa]`?."
|
295
|
+
)
|
296
|
+
async def edit_titles(self) -> Self:
|
297
|
+
"""Edits the titles of the article."""
|
298
|
+
from questionary import text
|
299
|
+
|
300
|
+
for a in self.iter_dfs():
|
301
|
+
a.title = await text(f"Edit `{a.title}`.", default=a.title).ask_async() or a.title
|
302
|
+
return self
|
@@ -7,7 +7,6 @@ from fabricatio.journal import logger
|
|
7
7
|
from fabricatio.models.generic import SketchedAble, WithBriefing
|
8
8
|
from fabricatio.utils import ask_edit
|
9
9
|
from pydantic import Field
|
10
|
-
from questionary import Choice, checkbox, text
|
11
10
|
from rich import print as r_print
|
12
11
|
|
13
12
|
|
@@ -74,6 +73,9 @@ class ProblemSolutions(SketchedAble):
|
|
74
73
|
return len(self.solutions) > 0
|
75
74
|
|
76
75
|
async def edit_problem(self) -> Self:
|
76
|
+
"""Interactively edit the problem description."""
|
77
|
+
from questionary import text
|
78
|
+
|
77
79
|
"""Interactively edit the problem description."""
|
78
80
|
self.problem = Problem.model_validate_strings(
|
79
81
|
await text("Please edit the problem below:", default=self.problem.display()).ask_async()
|
@@ -127,6 +129,8 @@ class Improvement(SketchedAble):
|
|
127
129
|
Returns:
|
128
130
|
Self: The current instance with filtered problems and solutions.
|
129
131
|
"""
|
132
|
+
from questionary import Choice, checkbox
|
133
|
+
|
130
134
|
# Choose the problems to retain
|
131
135
|
chosen_ones: List[ProblemSolutions] = await checkbox(
|
132
136
|
"Please choose the problems you want to retain.(Default: retain all)",
|