fabricatio 0.2.9.dev3__cp312-cp312-win_amd64.whl → 0.2.10__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fabricatio/actions/article.py +24 -114
- fabricatio/actions/article_rag.py +156 -18
- fabricatio/actions/fs.py +25 -0
- fabricatio/actions/output.py +17 -3
- fabricatio/actions/rag.py +40 -18
- fabricatio/actions/rules.py +14 -3
- fabricatio/capabilities/check.py +15 -9
- fabricatio/capabilities/correct.py +5 -6
- fabricatio/capabilities/rag.py +41 -231
- fabricatio/capabilities/rating.py +46 -40
- fabricatio/config.py +6 -4
- fabricatio/constants.py +20 -0
- fabricatio/decorators.py +23 -0
- fabricatio/fs/readers.py +20 -1
- fabricatio/models/adv_kwargs_types.py +35 -0
- fabricatio/models/events.py +6 -6
- fabricatio/models/extra/advanced_judge.py +4 -4
- fabricatio/models/extra/aricle_rag.py +170 -0
- fabricatio/models/extra/article_base.py +25 -211
- fabricatio/models/extra/article_essence.py +8 -7
- fabricatio/models/extra/article_main.py +98 -97
- fabricatio/models/extra/article_proposal.py +15 -14
- fabricatio/models/extra/patches.py +6 -6
- fabricatio/models/extra/problem.py +12 -17
- fabricatio/models/extra/rag.py +98 -0
- fabricatio/models/extra/rule.py +1 -2
- fabricatio/models/generic.py +53 -13
- fabricatio/models/kwargs_types.py +8 -36
- fabricatio/models/task.py +3 -3
- fabricatio/models/usages.py +85 -9
- fabricatio/parser.py +5 -5
- fabricatio/rust.cp312-win_amd64.pyd +0 -0
- fabricatio/rust.pyi +137 -10
- fabricatio/utils.py +62 -4
- fabricatio-0.2.10.data/scripts/tdown.exe +0 -0
- {fabricatio-0.2.9.dev3.dist-info → fabricatio-0.2.10.dist-info}/METADATA +1 -4
- fabricatio-0.2.10.dist-info/RECORD +64 -0
- fabricatio/models/utils.py +0 -148
- fabricatio-0.2.9.dev3.data/scripts/tdown.exe +0 -0
- fabricatio-0.2.9.dev3.dist-info/RECORD +0 -61
- {fabricatio-0.2.9.dev3.dist-info → fabricatio-0.2.10.dist-info}/WHEEL +0 -0
- {fabricatio-0.2.9.dev3.dist-info → fabricatio-0.2.10.dist-info}/licenses/LICENSE +0 -0
@@ -1,4 +1,8 @@
|
|
1
1
|
"""A module containing kwargs types for content correction and checking operations."""
|
2
|
+
|
3
|
+
from importlib.util import find_spec
|
4
|
+
from typing import NotRequired, TypedDict
|
5
|
+
|
2
6
|
from fabricatio.models.extra.problem import Improvement
|
3
7
|
from fabricatio.models.extra.rule import RuleSet
|
4
8
|
from fabricatio.models.generic import SketchedAble
|
@@ -23,3 +27,34 @@ class CheckKwargs(ReferencedKwargs[Improvement], total=False):
|
|
23
27
|
"""
|
24
28
|
|
25
29
|
ruleset: RuleSet
|
30
|
+
|
31
|
+
|
32
|
+
if find_spec("pymilvus"):
|
33
|
+
from pymilvus import CollectionSchema
|
34
|
+
from pymilvus.milvus_client import IndexParams
|
35
|
+
|
36
|
+
class CollectionConfigKwargs(TypedDict, total=False):
|
37
|
+
"""Configuration parameters for a vector collection.
|
38
|
+
|
39
|
+
These arguments are typically used when configuring connections to vector databases.
|
40
|
+
"""
|
41
|
+
|
42
|
+
dimension: int | None
|
43
|
+
primary_field_name: str
|
44
|
+
id_type: str
|
45
|
+
vector_field_name: str
|
46
|
+
metric_type: str
|
47
|
+
timeout: float | None
|
48
|
+
schema: CollectionSchema | None
|
49
|
+
index_params: IndexParams | None
|
50
|
+
|
51
|
+
class FetchKwargs(TypedDict):
|
52
|
+
"""Arguments for fetching data from vector collections.
|
53
|
+
|
54
|
+
Controls how data is retrieved from vector databases, including filtering
|
55
|
+
and result limiting parameters.
|
56
|
+
"""
|
57
|
+
|
58
|
+
collection_name: NotRequired[str | None]
|
59
|
+
similarity_threshold: NotRequired[float]
|
60
|
+
result_per_query: NotRequired[int]
|
fabricatio/models/events.py
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
from typing import List, Self, Union
|
4
4
|
|
5
5
|
from fabricatio.config import configs
|
6
|
-
from fabricatio.
|
6
|
+
from fabricatio.constants import TaskStatus
|
7
7
|
from pydantic import BaseModel, ConfigDict, Field
|
8
8
|
|
9
9
|
type EventLike = Union[str, List[str], "Event"]
|
@@ -77,23 +77,23 @@ class Event(BaseModel):
|
|
77
77
|
|
78
78
|
def push_pending(self) -> Self:
|
79
79
|
"""Push a pending segment to the event."""
|
80
|
-
return self.push(TaskStatus.Pending
|
80
|
+
return self.push(TaskStatus.Pending)
|
81
81
|
|
82
82
|
def push_running(self) -> Self:
|
83
83
|
"""Push a running segment to the event."""
|
84
|
-
return self.push(TaskStatus.Running
|
84
|
+
return self.push(TaskStatus.Running)
|
85
85
|
|
86
86
|
def push_finished(self) -> Self:
|
87
87
|
"""Push a finished segment to the event."""
|
88
|
-
return self.push(TaskStatus.Finished
|
88
|
+
return self.push(TaskStatus.Finished)
|
89
89
|
|
90
90
|
def push_failed(self) -> Self:
|
91
91
|
"""Push a failed segment to the event."""
|
92
|
-
return self.push(TaskStatus.Failed
|
92
|
+
return self.push(TaskStatus.Failed)
|
93
93
|
|
94
94
|
def push_cancelled(self) -> Self:
|
95
95
|
"""Push a cancelled segment to the event."""
|
96
|
-
return self.push(TaskStatus.Cancelled
|
96
|
+
return self.push(TaskStatus.Cancelled)
|
97
97
|
|
98
98
|
def pop(self) -> str:
|
99
99
|
"""Pop a segment from the event."""
|
@@ -2,17 +2,17 @@
|
|
2
2
|
|
3
3
|
from typing import List
|
4
4
|
|
5
|
-
from fabricatio.models.generic import
|
5
|
+
from fabricatio.models.generic import SketchedAble
|
6
6
|
|
7
7
|
|
8
|
-
class JudgeMent(
|
8
|
+
class JudgeMent(SketchedAble):
|
9
9
|
"""Represents a judgment result containing supporting/denying evidence and final verdict.
|
10
10
|
|
11
11
|
The class stores both affirmative and denies evidence, truth and reasons lists along with the final boolean judgment.
|
12
12
|
"""
|
13
13
|
|
14
14
|
issue_to_judge: str
|
15
|
-
"""The issue to be judged"""
|
15
|
+
"""The issue to be judged, including the original question and context"""
|
16
16
|
|
17
17
|
deny_evidence: List[str]
|
18
18
|
"""List of clues supporting the denial."""
|
@@ -21,7 +21,7 @@ class JudgeMent(ProposedAble, Display):
|
|
21
21
|
"""List of clues supporting the affirmation."""
|
22
22
|
|
23
23
|
final_judgement: bool
|
24
|
-
"""The final judgment made according to all extracted clues."""
|
24
|
+
"""The final judgment made according to all extracted clues. true for the `issue_to_judge` is correct and false for incorrect."""
|
25
25
|
|
26
26
|
def __bool__(self) -> bool:
|
27
27
|
"""Return the final judgment value.
|
@@ -0,0 +1,170 @@
|
|
1
|
+
"""A Module containing the article rag models."""
|
2
|
+
|
3
|
+
from pathlib import Path
|
4
|
+
from typing import ClassVar, Dict, List, Optional, Self, Unpack
|
5
|
+
|
6
|
+
from fabricatio.rust import BibManager, split_into_chunks, is_chinese
|
7
|
+
from more_itertools.recipes import flatten
|
8
|
+
from pydantic import Field
|
9
|
+
|
10
|
+
from fabricatio.fs import safe_text_read
|
11
|
+
from fabricatio.journal import logger
|
12
|
+
from fabricatio.models.extra.article_main import ArticleSubsection
|
13
|
+
from fabricatio.models.extra.rag import MilvusDataBase
|
14
|
+
from fabricatio.models.generic import AsPrompt
|
15
|
+
from fabricatio.models.kwargs_types import ChunkKwargs
|
16
|
+
from fabricatio.utils import ok, wrapp_in_block
|
17
|
+
|
18
|
+
|
19
|
+
class ArticleChunk(MilvusDataBase, AsPrompt):
|
20
|
+
"""The chunk of an article."""
|
21
|
+
|
22
|
+
etc_word: ClassVar[str] = "等"
|
23
|
+
and_word: ClassVar[str] = "与"
|
24
|
+
_cite_number: Optional[int] = None
|
25
|
+
|
26
|
+
head_split: ClassVar[List[str]] = [
|
27
|
+
"引 言",
|
28
|
+
"引言",
|
29
|
+
"绪 论",
|
30
|
+
"绪论",
|
31
|
+
"前言",
|
32
|
+
"INTRODUCTION",
|
33
|
+
"Introduction",
|
34
|
+
]
|
35
|
+
tail_split: ClassVar[List[str]] = [
|
36
|
+
"参 考 文 献",
|
37
|
+
"参 考 文 献",
|
38
|
+
"参考文献",
|
39
|
+
"REFERENCES",
|
40
|
+
"References",
|
41
|
+
"Bibliography",
|
42
|
+
"Reference",
|
43
|
+
]
|
44
|
+
chunk: str
|
45
|
+
"""The segment of the article"""
|
46
|
+
year: int
|
47
|
+
"""The year of the article"""
|
48
|
+
authors: List[str] = Field(default_factory=list)
|
49
|
+
"""The authors of the article"""
|
50
|
+
article_title: str
|
51
|
+
"""The title of the article"""
|
52
|
+
bibtex_cite_key: str
|
53
|
+
"""The bibtex cite key of the article"""
|
54
|
+
|
55
|
+
def _as_prompt_inner(self) -> Dict[str, str]:
|
56
|
+
return {
|
57
|
+
f"{ok(self._cite_number, 'You need to update cite number first.')}th reference `{self.article_title}`": f"{wrapp_in_block(self.chunk, 'Referring Content')}\n"
|
58
|
+
f"Authors: {';'.join(self.authors)}\n"
|
59
|
+
f"Published Year: {self.year}\n"
|
60
|
+
}
|
61
|
+
|
62
|
+
def _prepare_vectorization_inner(self) -> str:
|
63
|
+
return self.chunk
|
64
|
+
|
65
|
+
@classmethod
|
66
|
+
def from_file[P: str | Path](
|
67
|
+
cls, path: P | List[P], bib_mgr: BibManager, **kwargs: Unpack[ChunkKwargs]
|
68
|
+
) -> List[Self]:
|
69
|
+
"""Load the article chunks from the file."""
|
70
|
+
if isinstance(path, list):
|
71
|
+
result = list(flatten(cls._from_file_inner(p, bib_mgr, **kwargs) for p in path))
|
72
|
+
logger.debug(f"Number of chunks created from list of files: {len(result)}")
|
73
|
+
return result
|
74
|
+
|
75
|
+
return cls._from_file_inner(path, bib_mgr, **kwargs)
|
76
|
+
|
77
|
+
@classmethod
|
78
|
+
def _from_file_inner(cls, path: str | Path, bib_mgr: BibManager, **kwargs: Unpack[ChunkKwargs]) -> List[Self]:
|
79
|
+
path = Path(path)
|
80
|
+
|
81
|
+
title_seg = path.stem.split(" - ").pop()
|
82
|
+
|
83
|
+
key = (
|
84
|
+
bib_mgr.get_cite_key_by_title(title_seg)
|
85
|
+
or bib_mgr.get_cite_key_by_title_fuzzy(title_seg)
|
86
|
+
or bib_mgr.get_cite_key_fuzzy(path.stem)
|
87
|
+
)
|
88
|
+
if key is None:
|
89
|
+
logger.warning(f"no cite key found for {path.as_posix()}, skip.")
|
90
|
+
return []
|
91
|
+
authors = ok(bib_mgr.get_author_by_key(key), f"no author found for {key}")
|
92
|
+
year = ok(bib_mgr.get_year_by_key(key), f"no year found for {key}")
|
93
|
+
article_title = ok(bib_mgr.get_title_by_key(key), f"no title found for {key}")
|
94
|
+
|
95
|
+
result = [
|
96
|
+
cls(chunk=c, year=year, authors=authors, article_title=article_title, bibtex_cite_key=key)
|
97
|
+
for c in split_into_chunks(cls.strip(safe_text_read(path)), **kwargs)
|
98
|
+
]
|
99
|
+
logger.debug(f"Number of chunks created from file {path.as_posix()}: {len(result)}")
|
100
|
+
return result
|
101
|
+
|
102
|
+
@classmethod
|
103
|
+
def strip(cls, string: str) -> str:
|
104
|
+
"""Strip the head and tail of the string."""
|
105
|
+
logger.debug(f"String length before strip: {(original := len(string))}")
|
106
|
+
for split in (s for s in cls.head_split if s in string):
|
107
|
+
logger.debug(f"Strip head using {split}")
|
108
|
+
parts = string.split(split)
|
109
|
+
string = split.join(parts[1:]) if len(parts) > 1 else parts[0]
|
110
|
+
break
|
111
|
+
logger.debug(
|
112
|
+
f"String length after head strip: {(stripped_len := len(string))}, decreased by {(d := original - stripped_len)}"
|
113
|
+
)
|
114
|
+
if not d:
|
115
|
+
logger.warning("No decrease at head strip, which is might be abnormal.")
|
116
|
+
for split in (s for s in cls.tail_split if s in string):
|
117
|
+
logger.debug(f"Strip tail using {split}")
|
118
|
+
parts = string.split(split)
|
119
|
+
string = split.join(parts[:-1]) if len(parts) > 1 else parts[0]
|
120
|
+
break
|
121
|
+
logger.debug(f"String length after tail strip: {len(string)}, decreased by {(d := stripped_len - len(string))}")
|
122
|
+
if not d:
|
123
|
+
logger.warning("No decrease at tail strip, which is might be abnormal.")
|
124
|
+
|
125
|
+
return string
|
126
|
+
|
127
|
+
def as_typst_cite(self) -> str:
|
128
|
+
"""As typst cite."""
|
129
|
+
return f"#cite(<{self.bibtex_cite_key}>)"
|
130
|
+
|
131
|
+
@property
|
132
|
+
def auther_firstnames(self) -> List[str]:
|
133
|
+
"""Get the first name of the authors."""
|
134
|
+
ret = []
|
135
|
+
for n in self.authors:
|
136
|
+
if is_chinese(n):
|
137
|
+
ret.append(n[0])
|
138
|
+
else:
|
139
|
+
ret.append(n.split()[-1])
|
140
|
+
return ret
|
141
|
+
|
142
|
+
def as_auther_seq(self) -> str:
|
143
|
+
"""Get the auther sequence."""
|
144
|
+
match len(self.authors):
|
145
|
+
case 0:
|
146
|
+
raise ValueError("No authors found")
|
147
|
+
case 1:
|
148
|
+
return f"({self.auther_firstnames[0]},{self.year}){self.as_typst_cite()}"
|
149
|
+
case 2:
|
150
|
+
return f"({self.auther_firstnames[0]}{self.and_word}{self.auther_firstnames[1]},{self.year}){self.as_typst_cite()}"
|
151
|
+
case 3:
|
152
|
+
return f"({self.auther_firstnames[0]},{self.auther_firstnames[1]}{self.and_word}{self.auther_firstnames[2]},{self.year}){self.as_typst_cite()}"
|
153
|
+
case _:
|
154
|
+
return f"({self.auther_firstnames[0]},{self.auther_firstnames[1]}{self.and_word}{self.auther_firstnames[2]}{self.etc_word},{self.year}){self.as_typst_cite()}"
|
155
|
+
|
156
|
+
def update_cite_number(self, cite_number: int) -> Self:
|
157
|
+
"""Update the cite number."""
|
158
|
+
self._cite_number = cite_number
|
159
|
+
return self
|
160
|
+
|
161
|
+
def replace_cite(self, string: str, left_char: str = "[[", right_char: str = "]]") -> str:
|
162
|
+
"""Replace the cite number in the string."""
|
163
|
+
return string.replace(f"{left_char}{ok(self._cite_number)}{right_char}", self.as_auther_seq())
|
164
|
+
|
165
|
+
def apply(self, article_subsection: ArticleSubsection) -> ArticleSubsection:
|
166
|
+
"""Apply the patch to the article subsection."""
|
167
|
+
for p in article_subsection.paragraphs:
|
168
|
+
p.content = self.replace_cite(p.content)
|
169
|
+
|
170
|
+
return article_subsection
|
@@ -2,8 +2,7 @@
|
|
2
2
|
|
3
3
|
from abc import ABC, abstractmethod
|
4
4
|
from enum import StrEnum
|
5
|
-
from
|
6
|
-
from typing import Generator, List, Optional, Self, Tuple, overload
|
5
|
+
from typing import Generator, List, Optional, Self, Tuple
|
7
6
|
|
8
7
|
from fabricatio.models.generic import (
|
9
8
|
AsPrompt,
|
@@ -15,9 +14,11 @@ from fabricatio.models.generic import (
|
|
15
14
|
PersistentAble,
|
16
15
|
ProposedUpdateAble,
|
17
16
|
ResolveUpdateConflict,
|
18
|
-
SequencePatch,
|
19
17
|
SketchedAble,
|
18
|
+
Titled,
|
19
|
+
WordCount,
|
20
20
|
)
|
21
|
+
from pydantic import Field
|
21
22
|
|
22
23
|
|
23
24
|
class ReferringType(StrEnum):
|
@@ -28,102 +29,25 @@ class ReferringType(StrEnum):
|
|
28
29
|
SUBSECTION = "subsection"
|
29
30
|
|
30
31
|
|
31
|
-
type RefKey = Tuple[str, Optional[str], Optional[str]]
|
32
|
-
|
33
|
-
|
34
|
-
class ArticleRef(ProposedUpdateAble):
|
35
|
-
"""Reference to a specific chapter, section or subsection within the article. You SHALL not refer to an article component that is external and not present within our own article.
|
36
|
-
|
37
|
-
Examples:
|
38
|
-
- Referring to a chapter titled `Introduction`:
|
39
|
-
Using Python
|
40
|
-
```python
|
41
|
-
ArticleRef(referred_chapter_title="Introduction")
|
42
|
-
```
|
43
|
-
Using JSON
|
44
|
-
```json
|
45
|
-
{referred_chapter_title="Introduction"}
|
46
|
-
```
|
47
|
-
- Referring to a section titled `Background` under the `Introduction` chapter:
|
48
|
-
Using Python
|
49
|
-
```python
|
50
|
-
ArticleRef(referred_chapter_title="Introduction", referred_section_title="Background")
|
51
|
-
```
|
52
|
-
Using JSON
|
53
|
-
```json
|
54
|
-
{referred_chapter_title="Introduction", referred_section_title="Background"}
|
55
|
-
```
|
56
|
-
- Referring to a subsection titled `Related Work` under the `Background` section of the `Introduction` chapter:
|
57
|
-
Using Python
|
58
|
-
```python
|
59
|
-
ArticleRef(referred_chapter_title="Introduction", referred_section_title="Background", referred_subsection_title="Related Work")
|
60
|
-
```
|
61
|
-
Using JSON
|
62
|
-
```json
|
63
|
-
{referred_chapter_title="Introduction", referred_section_title="Background", referred_subsection_title="Related Work"}
|
64
|
-
```
|
65
|
-
"""
|
66
|
-
|
67
|
-
referred_chapter_title: str
|
68
|
-
"""`title` Field of the referenced chapter"""
|
69
|
-
referred_section_title: Optional[str] = None
|
70
|
-
"""`title` Field of the referenced section."""
|
71
|
-
referred_subsection_title: Optional[str] = None
|
72
|
-
"""`title` Field of the referenced subsection."""
|
73
|
-
|
74
|
-
def update_from_inner(self, other: Self) -> Self:
|
75
|
-
"""Updates the current instance with the attributes of another instance."""
|
76
|
-
self.referred_chapter_title = other.referred_chapter_title
|
77
|
-
self.referred_section_title = other.referred_section_title
|
78
|
-
self.referred_subsection_title = other.referred_subsection_title
|
79
|
-
return self
|
80
|
-
|
81
|
-
def deref(self, article: "ArticleBase") -> Optional["ArticleOutlineBase"]:
|
82
|
-
"""Dereference the reference to the actual section or subsection within the provided article.
|
83
|
-
|
84
|
-
Args:
|
85
|
-
article (ArticleOutline | Article): The article to dereference the reference from.
|
86
32
|
|
87
|
-
|
88
|
-
ArticleMainBase | ArticleOutline | None: The dereferenced section or subsection, or None if not found.
|
89
|
-
"""
|
90
|
-
chap = next((chap for chap in article.chapters if chap.title == self.referred_chapter_title), None)
|
91
|
-
if self.referred_section_title is None or chap is None:
|
92
|
-
return chap
|
93
|
-
sec = next((sec for sec in chap.sections if sec.title == self.referred_section_title), None)
|
94
|
-
if self.referred_subsection_title is None or sec is None:
|
95
|
-
return sec
|
96
|
-
return next((subsec for subsec in sec.subsections if subsec.title == self.referred_subsection_title), None)
|
33
|
+
type RefKey = Tuple[str, Optional[str], Optional[str]]
|
97
34
|
|
98
|
-
@property
|
99
|
-
def referring_type(self) -> ReferringType:
|
100
|
-
"""Determine the type of reference based on the presence of specific attributes."""
|
101
|
-
if self.referred_subsection_title is not None:
|
102
|
-
return ReferringType.SUBSECTION
|
103
|
-
if self.referred_section_title is not None:
|
104
|
-
return ReferringType.SECTION
|
105
|
-
return ReferringType.CHAPTER
|
106
35
|
|
107
36
|
|
108
|
-
class ArticleMetaData(SketchedAble, Described, Language):
|
37
|
+
class ArticleMetaData(SketchedAble, Described, WordCount, Titled, Language):
|
109
38
|
"""Metadata for an article component."""
|
110
39
|
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
40
|
+
description: str = Field(
|
41
|
+
alias="elaboration",
|
42
|
+
description=Described.model_fields["description"].description,
|
43
|
+
)
|
115
44
|
|
116
|
-
|
117
|
-
"""List of writing aims of the research component in academic style."""
|
118
|
-
title: str
|
119
|
-
"""Do not add any prefix or suffix to the title. should not contain special characters."""
|
45
|
+
title: str = Field(alias="heading", description=Titled.model_fields["title"].description)
|
120
46
|
|
121
|
-
|
122
|
-
"""
|
47
|
+
aims: List[str]
|
48
|
+
"""List of writing aims of the research component in academic style."""
|
123
49
|
|
124
50
|
|
125
|
-
class ArticleRefSequencePatch(SequencePatch[ArticleRef]):
|
126
|
-
"""Patch for article refs."""
|
127
51
|
|
128
52
|
|
129
53
|
class ArticleOutlineBase(
|
@@ -143,12 +67,8 @@ class ArticleOutlineBase(
|
|
143
67
|
|
144
68
|
def update_metadata(self, other: ArticleMetaData) -> Self:
|
145
69
|
"""Updates the metadata of the current instance with the attributes of another instance."""
|
146
|
-
self.
|
147
|
-
self.
|
148
|
-
self.depend_on.clear()
|
149
|
-
self.depend_on.extend(other.depend_on)
|
150
|
-
self.writing_aim.clear()
|
151
|
-
self.writing_aim.extend(other.writing_aim)
|
70
|
+
self.aims.clear()
|
71
|
+
self.aims.extend(other.aims)
|
152
72
|
self.description = other.description
|
153
73
|
return self
|
154
74
|
|
@@ -272,22 +192,19 @@ class ChapterBase[T: SectionBase](ArticleOutlineBase):
|
|
272
192
|
return ""
|
273
193
|
|
274
194
|
|
275
|
-
class ArticleBase[T: ChapterBase](FinalizedDumpAble, AsPrompt, Language, ABC):
|
195
|
+
class ArticleBase[T: ChapterBase](FinalizedDumpAble, AsPrompt, WordCount, Described, Titled, Language, ABC):
|
276
196
|
"""Base class for article outlines."""
|
277
197
|
|
278
|
-
title: str
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
"""
|
198
|
+
title: str = Field(alias="heading", description=Titled.model_fields["title"].description)
|
199
|
+
description: str = Field(alias="abstract")
|
200
|
+
"""The abstract serves as a concise summary of an academic article, encapsulating its core purpose, methodologies, key results,
|
201
|
+
and conclusions while enabling readers to rapidly assess the relevance and significance of the study.
|
202
|
+
Functioning as the article's distilled essence, it succinctly articulates the research problem, objectives,
|
203
|
+
and scope, providing a roadmap for the full text while also facilitating database indexing, literature reviews,
|
204
|
+
and citation tracking through standardized metadata. Additionally, it acts as an accessibility gateway,
|
205
|
+
allowing scholars to gauge the study's contribution to existing knowledge, its methodological rigor,
|
206
|
+
and its broader implications without engaging with the entire manuscript, thereby optimizing scholarly communication efficiency."""
|
288
207
|
|
289
|
-
abstract: str
|
290
|
-
"""The abstract is a concise summary of the academic paper's main findings."""
|
291
208
|
chapters: List[T]
|
292
209
|
"""Chapters of the article. Contains at least one chapter. You can also add more as needed."""
|
293
210
|
|
@@ -317,34 +234,6 @@ class ArticleBase[T: ChapterBase](FinalizedDumpAble, AsPrompt, Language, ABC):
|
|
317
234
|
yield sec
|
318
235
|
yield from sec.subsections
|
319
236
|
|
320
|
-
def iter_support_on(self, rev: bool = False) -> Generator[ArticleRef, None, None]:
|
321
|
-
"""Iterates over all references that the article components support.
|
322
|
-
|
323
|
-
Args:
|
324
|
-
rev (bool): If True, iterate in reverse order.
|
325
|
-
|
326
|
-
Yields:
|
327
|
-
ArticleRef: Each reference that the article components support.
|
328
|
-
"""
|
329
|
-
if rev:
|
330
|
-
yield from chain(*[a.support_to for a in self.iter_dfs_rev()])
|
331
|
-
return
|
332
|
-
yield from chain(*[a.support_to for a in self.iter_dfs()])
|
333
|
-
|
334
|
-
def iter_depend_on(self, rev: bool = False) -> Generator[ArticleRef, None, None]:
|
335
|
-
"""Iterates over all references that the article components depend on.
|
336
|
-
|
337
|
-
Args:
|
338
|
-
rev (bool): If True, iterate in reverse order.
|
339
|
-
|
340
|
-
Yields:
|
341
|
-
ArticleRef: Each reference that the article components depend on.
|
342
|
-
"""
|
343
|
-
if rev:
|
344
|
-
yield from chain(*[a.depend_on for a in self.iter_dfs_rev()])
|
345
|
-
return
|
346
|
-
yield from chain(*[a.depend_on for a in self.iter_dfs()])
|
347
|
-
|
348
237
|
def iter_sections(self) -> Generator[Tuple[ChapterBase, SectionBase], None, None]:
|
349
238
|
"""Iterates through all sections in the article.
|
350
239
|
|
@@ -378,12 +267,6 @@ class ArticleBase[T: ChapterBase](FinalizedDumpAble, AsPrompt, Language, ABC):
|
|
378
267
|
"""Gathers all introspected components in the article structure."""
|
379
268
|
return "\n".join([i for component in self.chapters if (i := component.introspect())])
|
380
269
|
|
381
|
-
@overload
|
382
|
-
def find_illegal_ref(self, gather_identical: bool) -> Optional[Tuple[ArticleRef | List[ArticleRef], str]]: ...
|
383
|
-
|
384
|
-
@overload
|
385
|
-
def find_illegal_ref(self) -> Optional[Tuple[ArticleRef, str]]: ...
|
386
|
-
|
387
270
|
def iter_chap_title(self) -> Generator[str, None, None]:
|
388
271
|
"""Iterates through all chapter titles in the article."""
|
389
272
|
for chap in self.chapters:
|
@@ -399,75 +282,6 @@ class ArticleBase[T: ChapterBase](FinalizedDumpAble, AsPrompt, Language, ABC):
|
|
399
282
|
for _, _, subsec in self.iter_subsections():
|
400
283
|
yield subsec.title
|
401
284
|
|
402
|
-
def find_illegal_ref(self, gather_identical: bool = False) -> Optional[Tuple[ArticleRef | List[ArticleRef], str]]:
|
403
|
-
"""Finds the first illegal component in the outline.
|
404
|
-
|
405
|
-
Returns:
|
406
|
-
Tuple[ArticleOutlineBase, str]: A tuple containing the illegal component and an error message.
|
407
|
-
"""
|
408
|
-
summary = ""
|
409
|
-
chap_titles_set = set(self.iter_chap_title())
|
410
|
-
sec_titles_set = set(self.iter_section_title())
|
411
|
-
subsec_titles_set = set(self.iter_subsection_title())
|
412
|
-
|
413
|
-
for component in self.iter_dfs_rev():
|
414
|
-
for ref in chain(component.depend_on, component.support_to):
|
415
|
-
if not ref.deref(self):
|
416
|
-
summary += f"Invalid internal reference in `{component.__class__.__name__}` titled `{component.title}`, because the referred {ref.referring_type} is not exists within the article, see the original obj dump: {ref.model_dump()}\n"
|
417
|
-
|
418
|
-
if ref.referred_chapter_title not in (chap_titles_set):
|
419
|
-
summary += f"Chapter titled `{ref.referred_chapter_title}` is not any of {chap_titles_set}\n"
|
420
|
-
if ref.referred_section_title and ref.referred_section_title not in (sec_titles_set):
|
421
|
-
summary += f"Section Titled `{ref.referred_section_title}` is not any of {sec_titles_set}\n"
|
422
|
-
if ref.referred_subsection_title and ref.referred_subsection_title not in (subsec_titles_set):
|
423
|
-
summary += (
|
424
|
-
f"Subsection Titled `{ref.referred_subsection_title}` is not any of {subsec_titles_set}"
|
425
|
-
)
|
426
|
-
|
427
|
-
if summary:
|
428
|
-
return (
|
429
|
-
(
|
430
|
-
[
|
431
|
-
identical_ref
|
432
|
-
for identical_ref in chain(self.iter_depend_on(), self.iter_support_on())
|
433
|
-
if identical_ref == ref
|
434
|
-
],
|
435
|
-
summary,
|
436
|
-
)
|
437
|
-
if gather_identical
|
438
|
-
else (ref, summary)
|
439
|
-
)
|
440
|
-
|
441
|
-
return None
|
442
|
-
|
443
|
-
def gather_illegal_ref(self) -> Tuple[List[ArticleRef], str]:
|
444
|
-
"""Gathers all illegal references in the article."""
|
445
|
-
summary = []
|
446
|
-
chap_titles_set = set(self.iter_chap_title())
|
447
|
-
sec_titles_set = set(self.iter_section_title())
|
448
|
-
subsec_titles_set = set(self.iter_subsection_title())
|
449
|
-
res_seq = []
|
450
|
-
|
451
|
-
for component in self.iter_dfs():
|
452
|
-
for ref in (
|
453
|
-
r for r in chain(component.depend_on, component.support_to) if not r.deref(self) and r not in res_seq
|
454
|
-
):
|
455
|
-
res_seq.append(ref)
|
456
|
-
if ref.referred_chapter_title not in chap_titles_set:
|
457
|
-
summary.append(
|
458
|
-
f"Chapter titled `{ref.referred_chapter_title}` is not exist, since it is not any of {chap_titles_set}."
|
459
|
-
)
|
460
|
-
if ref.referred_section_title and (ref.referred_section_title not in sec_titles_set):
|
461
|
-
summary.append(
|
462
|
-
f"Section Titled `{ref.referred_section_title}` is not exist, since it is not any of {sec_titles_set}"
|
463
|
-
)
|
464
|
-
if ref.referred_subsection_title and (ref.referred_subsection_title not in subsec_titles_set):
|
465
|
-
summary.append(
|
466
|
-
f"Subsection Titled `{ref.referred_subsection_title}` is not exist, since it is not any of {subsec_titles_set}"
|
467
|
-
)
|
468
|
-
|
469
|
-
return res_seq, "\n".join(summary)
|
470
|
-
|
471
285
|
def finalized_dump(self) -> str:
|
472
286
|
"""Generates standardized hierarchical markup for academic publishing systems.
|
473
287
|
|
@@ -1,8 +1,9 @@
|
|
1
1
|
"""ArticleEssence: Semantic fingerprint of academic paper for structured analysis."""
|
2
2
|
|
3
|
-
from typing import List
|
3
|
+
from typing import List
|
4
4
|
|
5
|
-
from fabricatio.models.
|
5
|
+
from fabricatio.models.extra.rag import MilvusDataBase
|
6
|
+
from fabricatio.models.generic import PersistentAble, SketchedAble
|
6
7
|
from pydantic import BaseModel
|
7
8
|
|
8
9
|
|
@@ -54,7 +55,7 @@ class Highlightings(BaseModel):
|
|
54
55
|
"""
|
55
56
|
|
56
57
|
|
57
|
-
class ArticleEssence(
|
58
|
+
class ArticleEssence(SketchedAble, PersistentAble, MilvusDataBase):
|
58
59
|
"""Structured representation of a scientific article's core elements in its original language."""
|
59
60
|
|
60
61
|
language: str
|
@@ -93,7 +94,7 @@ class ArticleEssence(ProposedAble, Display, PersistentAble, Vectorizable):
|
|
93
94
|
bibtex_cite_key: str
|
94
95
|
"""Bibtex cite key of the original article."""
|
95
96
|
|
96
|
-
def
|
97
|
-
|
98
|
-
|
99
|
-
|
97
|
+
def _prepare_vectorization_inner(self) -> str:
|
98
|
+
return self.compact()
|
99
|
+
|
100
|
+
|