fabricatio 0.2.10.dev0__cp312-cp312-win_amd64.whl → 0.2.11.dev0__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fabricatio/actions/article.py +12 -2
- fabricatio/actions/article_rag.py +132 -11
- fabricatio/actions/fs.py +25 -0
- fabricatio/actions/output.py +17 -3
- fabricatio/actions/rag.py +42 -20
- fabricatio/actions/rules.py +14 -3
- fabricatio/capabilities/extract.py +65 -0
- fabricatio/capabilities/rag.py +5 -2
- fabricatio/capabilities/rating.py +5 -2
- fabricatio/capabilities/task.py +16 -16
- fabricatio/config.py +9 -2
- fabricatio/decorators.py +30 -30
- fabricatio/fs/__init__.py +9 -2
- fabricatio/fs/readers.py +6 -10
- fabricatio/models/adv_kwargs_types.py +5 -12
- fabricatio/models/extra/aricle_rag.py +235 -0
- fabricatio/models/extra/article_essence.py +8 -7
- fabricatio/models/extra/article_main.py +39 -1
- fabricatio/models/extra/problem.py +7 -3
- fabricatio/models/extra/rag.py +49 -23
- fabricatio/models/generic.py +58 -30
- fabricatio/models/kwargs_types.py +11 -2
- fabricatio/models/usages.py +9 -26
- fabricatio/parser.py +16 -12
- fabricatio/rust.cp312-win_amd64.pyd +0 -0
- fabricatio/rust.pyi +140 -12
- fabricatio/utils.py +23 -2
- fabricatio-0.2.11.dev0.data/scripts/tdown.exe +0 -0
- {fabricatio-0.2.10.dev0.dist-info → fabricatio-0.2.11.dev0.dist-info}/METADATA +18 -12
- {fabricatio-0.2.10.dev0.dist-info → fabricatio-0.2.11.dev0.dist-info}/RECORD +32 -29
- fabricatio-0.2.10.dev0.data/scripts/tdown.exe +0 -0
- {fabricatio-0.2.10.dev0.dist-info → fabricatio-0.2.11.dev0.dist-info}/WHEEL +0 -0
- {fabricatio-0.2.10.dev0.dist-info → fabricatio-0.2.11.dev0.dist-info}/licenses/LICENSE +0 -0
fabricatio/decorators.py
CHANGED
@@ -8,14 +8,34 @@ from shutil import which
|
|
8
8
|
from types import ModuleType
|
9
9
|
from typing import Callable, List, Optional
|
10
10
|
|
11
|
-
from questionary import confirm
|
12
|
-
|
13
11
|
from fabricatio.config import configs
|
14
12
|
from fabricatio.journal import logger
|
15
13
|
|
16
14
|
|
15
|
+
def precheck_package[**P, R](package_name: str, msg: str) -> Callable[[Callable[P, R]], Callable[P, R]]:
|
16
|
+
"""Check if a package exists in the current environment.
|
17
|
+
|
18
|
+
Args:
|
19
|
+
package_name (str): The name of the package to check.
|
20
|
+
msg (str): The message to display if the package is not found.
|
21
|
+
|
22
|
+
Returns:
|
23
|
+
bool: True if the package exists, False otherwise.
|
24
|
+
"""
|
25
|
+
|
26
|
+
def _wrapper(func: Callable[P, R]) -> Callable[P, R]:
|
27
|
+
def _inner(*args: P.args, **kwargs: P.kwargs) -> R:
|
28
|
+
if find_spec(package_name):
|
29
|
+
return func(*args, **kwargs)
|
30
|
+
raise RuntimeError(msg)
|
31
|
+
|
32
|
+
return _inner
|
33
|
+
|
34
|
+
return _wrapper
|
35
|
+
|
36
|
+
|
17
37
|
def depend_on_external_cmd[**P, R](
|
18
|
-
|
38
|
+
bin_name: str, install_tip: Optional[str], homepage: Optional[str] = None
|
19
39
|
) -> Callable[[Callable[P, R]], Callable[P, R]]:
|
20
40
|
"""Decorator to check for the presence of an external command.
|
21
41
|
|
@@ -68,6 +88,8 @@ def logging_execution_info[**P, R](func: Callable[P, R]) -> Callable[P, R]:
|
|
68
88
|
return _wrapper
|
69
89
|
|
70
90
|
|
91
|
+
@precheck_package("questionary",
|
92
|
+
"'questionary' is required to run this function. Have you installed `fabricatio[qa]`?.")
|
71
93
|
def confirm_to_execute[**P, R](func: Callable[P, R]) -> Callable[P, Optional[R]] | Callable[P, R]:
|
72
94
|
"""Decorator to confirm before executing a function.
|
73
95
|
|
@@ -80,14 +102,15 @@ def confirm_to_execute[**P, R](func: Callable[P, R]) -> Callable[P, Optional[R]]
|
|
80
102
|
if not configs.general.confirm_on_ops:
|
81
103
|
# Skip confirmation if the configuration is set to False
|
82
104
|
return func
|
105
|
+
from questionary import confirm
|
83
106
|
|
84
107
|
if iscoroutinefunction(func):
|
85
108
|
|
86
109
|
@wraps(func)
|
87
110
|
async def _wrapper(*args: P.args, **kwargs: P.kwargs) -> Optional[R]:
|
88
111
|
if await confirm(
|
89
|
-
|
90
|
-
|
112
|
+
f"Are you sure to execute function: {func.__name__}{signature(func)} \n📦 Args:{args}\n🔑 Kwargs:{kwargs}\n",
|
113
|
+
instruction="Please input [Yes/No] to proceed (default: Yes):",
|
91
114
|
).ask_async():
|
92
115
|
return await func(*args, **kwargs)
|
93
116
|
logger.warning(f"Function: {func.__name__}{signature(func)} canceled by user.")
|
@@ -98,8 +121,8 @@ def confirm_to_execute[**P, R](func: Callable[P, R]) -> Callable[P, Optional[R]]
|
|
98
121
|
@wraps(func)
|
99
122
|
def _wrapper(*args: P.args, **kwargs: P.kwargs) -> Optional[R]:
|
100
123
|
if confirm(
|
101
|
-
|
102
|
-
|
124
|
+
f"Are you sure to execute function: {func.__name__}{signature(func)} \n📦 Args:{args}\n��� Kwargs:{kwargs}\n",
|
125
|
+
instruction="Please input [Yes/No] to proceed (default: Yes):",
|
103
126
|
).ask():
|
104
127
|
return func(*args, **kwargs)
|
105
128
|
logger.warning(f"Function: {func.__name__}{signature(func)} canceled by user.")
|
@@ -192,7 +215,6 @@ def logging_exec_time[**P, R](func: Callable[P, R]) -> Callable[P, R]:
|
|
192
215
|
from time import time
|
193
216
|
|
194
217
|
if iscoroutinefunction(func):
|
195
|
-
|
196
218
|
@wraps(func)
|
197
219
|
async def _async_wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
|
198
220
|
start_time = time()
|
@@ -210,25 +232,3 @@ def logging_exec_time[**P, R](func: Callable[P, R]) -> Callable[P, R]:
|
|
210
232
|
return result
|
211
233
|
|
212
234
|
return _wrapper
|
213
|
-
|
214
|
-
|
215
|
-
def precheck_package[**P, R](package_name: str, msg: str) -> Callable[[Callable[P, R]], Callable[P, R]]:
|
216
|
-
"""Check if a package exists in the current environment.
|
217
|
-
|
218
|
-
Args:
|
219
|
-
package_name (str): The name of the package to check.
|
220
|
-
msg (str): The message to display if the package is not found.
|
221
|
-
|
222
|
-
Returns:
|
223
|
-
bool: True if the package exists, False otherwise.
|
224
|
-
"""
|
225
|
-
|
226
|
-
def _wrapper(func: Callable[P, R]) -> Callable[P, R]:
|
227
|
-
def _inner(*args: P.args, **kwargs: P.kwargs) -> R:
|
228
|
-
if find_spec(package_name):
|
229
|
-
return func(*args, **kwargs)
|
230
|
-
raise RuntimeError(msg)
|
231
|
-
|
232
|
-
return _inner
|
233
|
-
|
234
|
-
return _wrapper
|
fabricatio/fs/__init__.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
"""FileSystem manipulation module for Fabricatio."""
|
2
|
+
from importlib.util import find_spec
|
2
3
|
|
4
|
+
from fabricatio.config import configs
|
3
5
|
from fabricatio.fs.curd import (
|
4
6
|
absolute_path,
|
5
7
|
copy_file,
|
@@ -11,10 +13,9 @@ from fabricatio.fs.curd import (
|
|
11
13
|
move_file,
|
12
14
|
tree,
|
13
15
|
)
|
14
|
-
from fabricatio.fs.readers import
|
16
|
+
from fabricatio.fs.readers import safe_json_read, safe_text_read
|
15
17
|
|
16
18
|
__all__ = [
|
17
|
-
"MAGIKA",
|
18
19
|
"absolute_path",
|
19
20
|
"copy_file",
|
20
21
|
"create_directory",
|
@@ -27,3 +28,9 @@ __all__ = [
|
|
27
28
|
"safe_text_read",
|
28
29
|
"tree",
|
29
30
|
]
|
31
|
+
|
32
|
+
if find_spec("magika"):
|
33
|
+
from magika import Magika
|
34
|
+
|
35
|
+
MAGIKA = Magika(model_dir=configs.magika.model_dir)
|
36
|
+
__all__ += ["MAGIKA"]
|
fabricatio/fs/readers.py
CHANGED
@@ -1,17 +1,13 @@
|
|
1
1
|
"""Filesystem readers for Fabricatio."""
|
2
2
|
|
3
|
+
import re
|
3
4
|
from pathlib import Path
|
4
5
|
from typing import Dict, List, Tuple
|
5
6
|
|
6
|
-
import
|
7
|
-
import regex
|
8
|
-
from magika import Magika
|
7
|
+
import ujson
|
9
8
|
|
10
|
-
from fabricatio.config import configs
|
11
9
|
from fabricatio.journal import logger
|
12
10
|
|
13
|
-
MAGIKA = Magika(model_dir=configs.magika.model_dir)
|
14
|
-
|
15
11
|
|
16
12
|
def safe_text_read(path: Path | str) -> str:
|
17
13
|
"""Safely read the text from a file.
|
@@ -41,8 +37,8 @@ def safe_json_read(path: Path | str) -> Dict:
|
|
41
37
|
"""
|
42
38
|
path = Path(path)
|
43
39
|
try:
|
44
|
-
return
|
45
|
-
except (
|
40
|
+
return ujson.loads(path.read_text(encoding="utf-8"))
|
41
|
+
except (ujson.JSONDecodeError, IsADirectoryError, FileNotFoundError) as e:
|
46
42
|
logger.error(f"Failed to read file {path}: {e!s}")
|
47
43
|
return {}
|
48
44
|
|
@@ -58,8 +54,8 @@ def extract_sections(string: str, level: int, section_char: str = "#") -> List[T
|
|
58
54
|
Returns:
|
59
55
|
List[Tuple[str, str]]: List of (header_text, section_content) tuples
|
60
56
|
"""
|
61
|
-
return
|
57
|
+
return re.findall(
|
62
58
|
r"^%s{%d}\s+(.+?)\n((?:(?!^%s{%d}\s).|\n)*)" % (section_char, level, section_char, level),
|
63
59
|
string,
|
64
|
-
|
60
|
+
re.MULTILINE,
|
65
61
|
)
|
@@ -1,10 +1,9 @@
|
|
1
1
|
"""A module containing kwargs types for content correction and checking operations."""
|
2
2
|
|
3
3
|
from importlib.util import find_spec
|
4
|
-
from typing import
|
4
|
+
from typing import NotRequired, TypedDict
|
5
5
|
|
6
6
|
from fabricatio.models.extra.problem import Improvement
|
7
|
-
from fabricatio.models.extra.rag import MilvusDataBase
|
8
7
|
from fabricatio.models.extra.rule import RuleSet
|
9
8
|
from fabricatio.models.generic import SketchedAble
|
10
9
|
from fabricatio.models.kwargs_types import ReferencedKwargs
|
@@ -49,19 +48,13 @@ if find_spec("pymilvus"):
|
|
49
48
|
schema: CollectionSchema | None
|
50
49
|
index_params: IndexParams | None
|
51
50
|
|
52
|
-
class FetchKwargs
|
51
|
+
class FetchKwargs(TypedDict):
|
53
52
|
"""Arguments for fetching data from vector collections.
|
54
53
|
|
55
54
|
Controls how data is retrieved from vector databases, including filtering
|
56
55
|
and result limiting parameters.
|
57
56
|
"""
|
58
57
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
result_per_query: int
|
63
|
-
|
64
|
-
class RetrievalKwargs(FetchKwargs, total=False):
|
65
|
-
"""Arguments for retrieval operations."""
|
66
|
-
|
67
|
-
final_limit: int
|
58
|
+
collection_name: NotRequired[str | None]
|
59
|
+
similarity_threshold: NotRequired[float]
|
60
|
+
result_per_query: NotRequired[int]
|
@@ -0,0 +1,235 @@
|
|
1
|
+
"""A Module containing the article rag models."""
|
2
|
+
|
3
|
+
import re
|
4
|
+
from pathlib import Path
|
5
|
+
from typing import ClassVar, Dict, List, Optional, Self, Unpack
|
6
|
+
|
7
|
+
from fabricatio.fs import safe_text_read
|
8
|
+
from fabricatio.journal import logger
|
9
|
+
from fabricatio.models.extra.rag import MilvusDataBase
|
10
|
+
from fabricatio.models.generic import AsPrompt
|
11
|
+
from fabricatio.models.kwargs_types import ChunkKwargs
|
12
|
+
from fabricatio.rust import BibManager, is_chinese, split_into_chunks
|
13
|
+
from fabricatio.utils import ok
|
14
|
+
from more_itertools.recipes import flatten, unique
|
15
|
+
from pydantic import Field
|
16
|
+
|
17
|
+
|
18
|
+
class ArticleChunk(MilvusDataBase, AsPrompt):
|
19
|
+
"""The chunk of an article."""
|
20
|
+
|
21
|
+
etc_word: ClassVar[str] = "等"
|
22
|
+
and_word: ClassVar[str] = "与"
|
23
|
+
_cite_number: Optional[int] = None
|
24
|
+
|
25
|
+
head_split: ClassVar[List[str]] = [
|
26
|
+
"引 言",
|
27
|
+
"引言",
|
28
|
+
"绪 论",
|
29
|
+
"绪论",
|
30
|
+
"前言",
|
31
|
+
"INTRODUCTION",
|
32
|
+
"Introduction",
|
33
|
+
]
|
34
|
+
tail_split: ClassVar[List[str]] = [
|
35
|
+
"参 考 文 献",
|
36
|
+
"参 考 文 献",
|
37
|
+
"参考文献",
|
38
|
+
"REFERENCES",
|
39
|
+
"References",
|
40
|
+
"Bibliography",
|
41
|
+
"Reference",
|
42
|
+
]
|
43
|
+
chunk: str
|
44
|
+
"""The segment of the article"""
|
45
|
+
year: int
|
46
|
+
"""The year of the article"""
|
47
|
+
authors: List[str] = Field(default_factory=list)
|
48
|
+
"""The authors of the article"""
|
49
|
+
article_title: str
|
50
|
+
"""The title of the article"""
|
51
|
+
bibtex_cite_key: str
|
52
|
+
"""The bibtex cite key of the article"""
|
53
|
+
|
54
|
+
def _as_prompt_inner(self) -> Dict[str, str]:
|
55
|
+
return {
|
56
|
+
f"[[{ok(self._cite_number, 'You need to update cite number first.')}]] reference `{self.article_title}`": self.chunk
|
57
|
+
}
|
58
|
+
|
59
|
+
@property
|
60
|
+
def cite_number(self) -> int:
|
61
|
+
"""Get the cite number."""
|
62
|
+
return ok(self._cite_number, "cite number not set")
|
63
|
+
|
64
|
+
def _prepare_vectorization_inner(self) -> str:
|
65
|
+
return self.chunk
|
66
|
+
|
67
|
+
@classmethod
|
68
|
+
def from_file[P: str | Path](
|
69
|
+
cls, path: P | List[P], bib_mgr: BibManager, **kwargs: Unpack[ChunkKwargs]
|
70
|
+
) -> List[Self]:
|
71
|
+
"""Load the article chunks from the file."""
|
72
|
+
if isinstance(path, list):
|
73
|
+
result = list(flatten(cls._from_file_inner(p, bib_mgr, **kwargs) for p in path))
|
74
|
+
logger.debug(f"Number of chunks created from list of files: {len(result)}")
|
75
|
+
return result
|
76
|
+
|
77
|
+
return cls._from_file_inner(path, bib_mgr, **kwargs)
|
78
|
+
|
79
|
+
@classmethod
|
80
|
+
def _from_file_inner(cls, path: str | Path, bib_mgr: BibManager, **kwargs: Unpack[ChunkKwargs]) -> List[Self]:
|
81
|
+
path = Path(path)
|
82
|
+
|
83
|
+
title_seg = path.stem.split(" - ").pop()
|
84
|
+
|
85
|
+
key = (
|
86
|
+
bib_mgr.get_cite_key_by_title(title_seg)
|
87
|
+
or bib_mgr.get_cite_key_by_title_fuzzy(title_seg)
|
88
|
+
or bib_mgr.get_cite_key_fuzzy(path.stem)
|
89
|
+
)
|
90
|
+
if key is None:
|
91
|
+
logger.warning(f"no cite key found for {path.as_posix()}, skip.")
|
92
|
+
return []
|
93
|
+
authors = ok(bib_mgr.get_author_by_key(key), f"no author found for {key}")
|
94
|
+
year = ok(bib_mgr.get_year_by_key(key), f"no year found for {key}")
|
95
|
+
article_title = ok(bib_mgr.get_title_by_key(key), f"no title found for {key}")
|
96
|
+
|
97
|
+
result = [
|
98
|
+
cls(chunk=c, year=year, authors=authors, article_title=article_title, bibtex_cite_key=key)
|
99
|
+
for c in split_into_chunks(cls.purge_numeric_citation(cls.strip(safe_text_read(path))), **kwargs)
|
100
|
+
]
|
101
|
+
|
102
|
+
logger.debug(f"Number of chunks created from file {path.as_posix()}: {len(result)}")
|
103
|
+
return result
|
104
|
+
|
105
|
+
@classmethod
|
106
|
+
def strip(cls, string: str) -> str:
|
107
|
+
"""Strip the head and tail of the string."""
|
108
|
+
logger.debug(f"String length before strip: {(original := len(string))}")
|
109
|
+
for split in (s for s in cls.head_split if s in string):
|
110
|
+
logger.debug(f"Strip head using {split}")
|
111
|
+
parts = string.split(split)
|
112
|
+
string = split.join(parts[1:]) if len(parts) > 1 else parts[0]
|
113
|
+
break
|
114
|
+
logger.debug(
|
115
|
+
f"String length after head strip: {(stripped_len := len(string))}, decreased by {(d := original - stripped_len)}"
|
116
|
+
)
|
117
|
+
if not d:
|
118
|
+
logger.warning("No decrease at head strip, which is might be abnormal.")
|
119
|
+
for split in (s for s in cls.tail_split if s in string):
|
120
|
+
logger.debug(f"Strip tail using {split}")
|
121
|
+
parts = string.split(split)
|
122
|
+
string = split.join(parts[:-1]) if len(parts) > 1 else parts[0]
|
123
|
+
break
|
124
|
+
logger.debug(f"String length after tail strip: {len(string)}, decreased by {(d := stripped_len - len(string))}")
|
125
|
+
if not d:
|
126
|
+
logger.warning("No decrease at tail strip, which is might be abnormal.")
|
127
|
+
|
128
|
+
return string
|
129
|
+
|
130
|
+
def as_typst_cite(self) -> str:
|
131
|
+
"""As typst cite."""
|
132
|
+
return f"#cite(<{self.bibtex_cite_key}>)"
|
133
|
+
|
134
|
+
@staticmethod
|
135
|
+
def purge_numeric_citation(string: str) -> str:
|
136
|
+
"""Purge numeric citation."""
|
137
|
+
import re
|
138
|
+
|
139
|
+
return re.sub(r"\[[\d\s,\\~–-]+]", "", string) # noqa: RUF001
|
140
|
+
|
141
|
+
@property
|
142
|
+
def auther_firstnames(self) -> List[str]:
|
143
|
+
"""Get the first name of the authors."""
|
144
|
+
ret = []
|
145
|
+
for n in self.authors:
|
146
|
+
if is_chinese(n):
|
147
|
+
ret.append(n[0])
|
148
|
+
else:
|
149
|
+
ret.append(n.split()[-1])
|
150
|
+
return ret
|
151
|
+
|
152
|
+
def as_auther_seq(self) -> str:
|
153
|
+
"""Get the auther sequence."""
|
154
|
+
match len(self.authors):
|
155
|
+
case 0:
|
156
|
+
raise ValueError("No authors found")
|
157
|
+
case 1:
|
158
|
+
return f"({self.auther_firstnames[0]},{self.year}){self.as_typst_cite()}"
|
159
|
+
case 2:
|
160
|
+
return f"({self.auther_firstnames[0]}{self.and_word}{self.auther_firstnames[1]},{self.year}){self.as_typst_cite()}"
|
161
|
+
case 3:
|
162
|
+
return f"({self.auther_firstnames[0]},{self.auther_firstnames[1]}{self.and_word}{self.auther_firstnames[2]},{self.year}){self.as_typst_cite()}"
|
163
|
+
case _:
|
164
|
+
return f"({self.auther_firstnames[0]},{self.auther_firstnames[1]}{self.and_word}{self.auther_firstnames[2]}{self.etc_word},{self.year}){self.as_typst_cite()}"
|
165
|
+
|
166
|
+
def update_cite_number(self, cite_number: int) -> Self:
|
167
|
+
"""Update the cite number."""
|
168
|
+
self._cite_number = cite_number
|
169
|
+
return self
|
170
|
+
|
171
|
+
|
172
|
+
class CitationManager(AsPrompt):
|
173
|
+
"""Citation manager."""
|
174
|
+
|
175
|
+
article_chunks: List[ArticleChunk] = Field(default_factory=list)
|
176
|
+
"""Article chunks."""
|
177
|
+
|
178
|
+
pat: str = r"\[\[([\d\s,-]*)]]"
|
179
|
+
"""Regex pattern to match citations."""
|
180
|
+
sep: str = ","
|
181
|
+
"""Separator for citation numbers."""
|
182
|
+
abbr_sep: str = "-"
|
183
|
+
"""Separator for abbreviated citation numbers."""
|
184
|
+
|
185
|
+
def update_chunks(self, article_chunks: List[ArticleChunk], set_cite_number: bool = True) -> Self:
|
186
|
+
"""Update article chunks."""
|
187
|
+
self.article_chunks.clear()
|
188
|
+
self.article_chunks.extend(article_chunks)
|
189
|
+
if set_cite_number:
|
190
|
+
self.set_cite_number_all()
|
191
|
+
return self
|
192
|
+
|
193
|
+
def set_cite_number_all(self) -> Self:
|
194
|
+
"""Set citation numbers for all article chunks."""
|
195
|
+
for i, a in enumerate(self.article_chunks, 1):
|
196
|
+
a.update_cite_number(i)
|
197
|
+
return self
|
198
|
+
|
199
|
+
def _as_prompt_inner(self) -> Dict[str, str]:
|
200
|
+
"""Generate prompt inner representation."""
|
201
|
+
return {"References": "\n".join(r.as_prompt() for r in self.article_chunks)}
|
202
|
+
|
203
|
+
def apply(self, string: str) -> str:
|
204
|
+
"""Apply citation replacements to the input string."""
|
205
|
+
matches = re.findall(self.pat, string)
|
206
|
+
|
207
|
+
for m in matches:
|
208
|
+
notations = self.convert_to_numeric_notations(m)
|
209
|
+
|
210
|
+
citation_number_seq = list(flatten(self.decode_expr(n) for n in notations))
|
211
|
+
dedup = self.deduplicate_citation(citation_number_seq)
|
212
|
+
string.replace(m, self.unpack_cite_seq(dedup))
|
213
|
+
return string
|
214
|
+
|
215
|
+
def decode_expr(self, string: str) -> List[int]:
|
216
|
+
"""Decode citation expression into a list of integers."""
|
217
|
+
if self.abbr_sep in string:
|
218
|
+
start, end = string.split(self.abbr_sep)
|
219
|
+
return list(range(int(start), int(end) + 1))
|
220
|
+
return [int(string)]
|
221
|
+
|
222
|
+
def convert_to_numeric_notations(self, string: str) -> List[str]:
|
223
|
+
"""Convert citation string into numeric notations."""
|
224
|
+
return [s.strip() for s in string.split(self.sep)]
|
225
|
+
|
226
|
+
def deduplicate_citation(self, citation_seq: List[int]) -> List[int]:
|
227
|
+
"""Deduplicate citation sequence."""
|
228
|
+
chunk_seq = [a for a in self.article_chunks if a.cite_number in citation_seq]
|
229
|
+
deduped = unique(chunk_seq, lambda a: a.cite_number)
|
230
|
+
return [a.cite_number for a in deduped]
|
231
|
+
|
232
|
+
def unpack_cite_seq(self, citation_seq: List[int]) -> str:
|
233
|
+
"""Unpack citation sequence into a string."""
|
234
|
+
chunk_seq = [a for a in self.article_chunks if a.cite_number in citation_seq]
|
235
|
+
return "".join(a.as_typst_cite() for a in chunk_seq)
|
@@ -1,8 +1,9 @@
|
|
1
1
|
"""ArticleEssence: Semantic fingerprint of academic paper for structured analysis."""
|
2
2
|
|
3
|
-
from typing import List
|
3
|
+
from typing import List
|
4
4
|
|
5
|
-
from fabricatio.models.
|
5
|
+
from fabricatio.models.extra.rag import MilvusDataBase
|
6
|
+
from fabricatio.models.generic import PersistentAble, SketchedAble
|
6
7
|
from pydantic import BaseModel
|
7
8
|
|
8
9
|
|
@@ -54,7 +55,7 @@ class Highlightings(BaseModel):
|
|
54
55
|
"""
|
55
56
|
|
56
57
|
|
57
|
-
class ArticleEssence(
|
58
|
+
class ArticleEssence(SketchedAble, PersistentAble, MilvusDataBase):
|
58
59
|
"""Structured representation of a scientific article's core elements in its original language."""
|
59
60
|
|
60
61
|
language: str
|
@@ -93,7 +94,7 @@ class ArticleEssence(ProposedAble, Display, PersistentAble, Vectorizable):
|
|
93
94
|
bibtex_cite_key: str
|
94
95
|
"""Bibtex cite key of the original article."""
|
95
96
|
|
96
|
-
def
|
97
|
-
|
98
|
-
|
99
|
-
|
97
|
+
def _prepare_vectorization_inner(self) -> str:
|
98
|
+
return self.compact()
|
99
|
+
|
100
|
+
|
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
from typing import Dict, Generator, List, Self, Tuple, override
|
4
4
|
|
5
|
+
from fabricatio.decorators import precheck_package
|
5
6
|
from fabricatio.fs.readers import extract_sections
|
6
7
|
from fabricatio.journal import logger
|
7
8
|
from fabricatio.models.extra.article_base import (
|
@@ -14,7 +15,7 @@ from fabricatio.models.extra.article_outline import (
|
|
14
15
|
ArticleOutline,
|
15
16
|
)
|
16
17
|
from fabricatio.models.generic import Described, PersistentAble, SequencePatch, SketchedAble, WithRef, WordCount
|
17
|
-
from fabricatio.rust import word_count
|
18
|
+
from fabricatio.rust import convert_all_block_tex, convert_all_inline_tex, word_count
|
18
19
|
from pydantic import Field
|
19
20
|
|
20
21
|
PARAGRAPH_SEP = "// - - -"
|
@@ -153,6 +154,26 @@ class Article(
|
|
153
154
|
"Original Article": self.display(),
|
154
155
|
}
|
155
156
|
|
157
|
+
def convert_tex(self) -> Self:
|
158
|
+
"""Convert tex to typst code."""
|
159
|
+
for _, _, subsec in self.iter_subsections():
|
160
|
+
for p in subsec.paragraphs:
|
161
|
+
p.content = convert_all_inline_tex(p.content)
|
162
|
+
p.content = convert_all_block_tex(p.content)
|
163
|
+
return self
|
164
|
+
|
165
|
+
def fix_wrapper(self) -> Self:
|
166
|
+
"""Fix wrapper."""
|
167
|
+
for _, _, subsec in self.iter_subsections():
|
168
|
+
for p in subsec.paragraphs:
|
169
|
+
p.content = (
|
170
|
+
p.content.replace(r" \( ", "$")
|
171
|
+
.replace(r" \) ", "$")
|
172
|
+
.replace("\\[\n", "$$\n")
|
173
|
+
.replace("\n\\]", "\n$$")
|
174
|
+
)
|
175
|
+
return self
|
176
|
+
|
156
177
|
@override
|
157
178
|
def iter_subsections(self) -> Generator[Tuple[ArticleChapter, ArticleSection, ArticleSubsection], None, None]:
|
158
179
|
return super().iter_subsections() # pyright: ignore [reportReturnType]
|
@@ -204,3 +225,20 @@ class Article(
|
|
204
225
|
expected_word_count=word_count(body),
|
205
226
|
abstract="",
|
206
227
|
)
|
228
|
+
|
229
|
+
@classmethod
|
230
|
+
def from_mixed_source(cls, article_outline: ArticleOutline, typst_code: str) -> Self:
|
231
|
+
"""Generates an article from the given outline and Typst code."""
|
232
|
+
self = cls.from_typst_code(article_outline.title, typst_code)
|
233
|
+
self.expected_word_count = article_outline.expected_word_count
|
234
|
+
self.description = article_outline.description
|
235
|
+
for a, o in zip(self.iter_dfs(), article_outline.iter_dfs(), strict=True):
|
236
|
+
a.update_metadata(o)
|
237
|
+
return self.update_ref(article_outline)
|
238
|
+
|
239
|
+
@precheck_package(
|
240
|
+
"questionary", "'questionary' is required to run this function. Have you installed `fabricatio[qa]`?."
|
241
|
+
)
|
242
|
+
def edit_titles(self) -> Self:
|
243
|
+
for a in self.iter_dfs():
|
244
|
+
pass
|
@@ -3,12 +3,12 @@
|
|
3
3
|
from itertools import chain
|
4
4
|
from typing import Any, List, Optional, Self, Tuple, Unpack
|
5
5
|
|
6
|
+
from pydantic import Field
|
7
|
+
from rich import print as r_print
|
8
|
+
|
6
9
|
from fabricatio.journal import logger
|
7
10
|
from fabricatio.models.generic import SketchedAble, WithBriefing
|
8
11
|
from fabricatio.utils import ask_edit
|
9
|
-
from pydantic import Field
|
10
|
-
from questionary import Choice, checkbox, text
|
11
|
-
from rich import print as r_print
|
12
12
|
|
13
13
|
|
14
14
|
class Problem(SketchedAble, WithBriefing):
|
@@ -74,6 +74,8 @@ class ProblemSolutions(SketchedAble):
|
|
74
74
|
return len(self.solutions) > 0
|
75
75
|
|
76
76
|
async def edit_problem(self) -> Self:
|
77
|
+
from questionary import text
|
78
|
+
|
77
79
|
"""Interactively edit the problem description."""
|
78
80
|
self.problem = Problem.model_validate_strings(
|
79
81
|
await text("Please edit the problem below:", default=self.problem.display()).ask_async()
|
@@ -127,6 +129,8 @@ class Improvement(SketchedAble):
|
|
127
129
|
Returns:
|
128
130
|
Self: The current instance with filtered problems and solutions.
|
129
131
|
"""
|
132
|
+
from questionary import Choice, checkbox
|
133
|
+
|
130
134
|
# Choose the problems to retain
|
131
135
|
chosen_ones: List[ProblemSolutions] = await checkbox(
|
132
136
|
"Please choose the problems you want to retain.(Default: retain all)",
|