fabricatio 0.2.9.dev3__cp312-cp312-win_amd64.whl → 0.2.10__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. fabricatio/actions/article.py +24 -114
  2. fabricatio/actions/article_rag.py +156 -18
  3. fabricatio/actions/fs.py +25 -0
  4. fabricatio/actions/output.py +17 -3
  5. fabricatio/actions/rag.py +40 -18
  6. fabricatio/actions/rules.py +14 -3
  7. fabricatio/capabilities/check.py +15 -9
  8. fabricatio/capabilities/correct.py +5 -6
  9. fabricatio/capabilities/rag.py +41 -231
  10. fabricatio/capabilities/rating.py +46 -40
  11. fabricatio/config.py +6 -4
  12. fabricatio/constants.py +20 -0
  13. fabricatio/decorators.py +23 -0
  14. fabricatio/fs/readers.py +20 -1
  15. fabricatio/models/adv_kwargs_types.py +35 -0
  16. fabricatio/models/events.py +6 -6
  17. fabricatio/models/extra/advanced_judge.py +4 -4
  18. fabricatio/models/extra/aricle_rag.py +170 -0
  19. fabricatio/models/extra/article_base.py +25 -211
  20. fabricatio/models/extra/article_essence.py +8 -7
  21. fabricatio/models/extra/article_main.py +98 -97
  22. fabricatio/models/extra/article_proposal.py +15 -14
  23. fabricatio/models/extra/patches.py +6 -6
  24. fabricatio/models/extra/problem.py +12 -17
  25. fabricatio/models/extra/rag.py +98 -0
  26. fabricatio/models/extra/rule.py +1 -2
  27. fabricatio/models/generic.py +53 -13
  28. fabricatio/models/kwargs_types.py +8 -36
  29. fabricatio/models/task.py +3 -3
  30. fabricatio/models/usages.py +85 -9
  31. fabricatio/parser.py +5 -5
  32. fabricatio/rust.cp312-win_amd64.pyd +0 -0
  33. fabricatio/rust.pyi +137 -10
  34. fabricatio/utils.py +62 -4
  35. fabricatio-0.2.10.data/scripts/tdown.exe +0 -0
  36. {fabricatio-0.2.9.dev3.dist-info → fabricatio-0.2.10.dist-info}/METADATA +1 -4
  37. fabricatio-0.2.10.dist-info/RECORD +64 -0
  38. fabricatio/models/utils.py +0 -148
  39. fabricatio-0.2.9.dev3.data/scripts/tdown.exe +0 -0
  40. fabricatio-0.2.9.dev3.dist-info/RECORD +0 -61
  41. {fabricatio-0.2.9.dev3.dist-info → fabricatio-0.2.10.dist-info}/WHEEL +0 -0
  42. {fabricatio-0.2.9.dev3.dist-info → fabricatio-0.2.10.dist-info}/licenses/LICENSE +0 -0
@@ -1,4 +1,8 @@
1
1
  """A module containing kwargs types for content correction and checking operations."""
2
+
3
+ from importlib.util import find_spec
4
+ from typing import NotRequired, TypedDict
5
+
2
6
  from fabricatio.models.extra.problem import Improvement
3
7
  from fabricatio.models.extra.rule import RuleSet
4
8
  from fabricatio.models.generic import SketchedAble
@@ -23,3 +27,34 @@ class CheckKwargs(ReferencedKwargs[Improvement], total=False):
23
27
  """
24
28
 
25
29
  ruleset: RuleSet
30
+
31
+
32
+ if find_spec("pymilvus"):
33
+ from pymilvus import CollectionSchema
34
+ from pymilvus.milvus_client import IndexParams
35
+
36
+ class CollectionConfigKwargs(TypedDict, total=False):
37
+ """Configuration parameters for a vector collection.
38
+
39
+ These arguments are typically used when configuring connections to vector databases.
40
+ """
41
+
42
+ dimension: int | None
43
+ primary_field_name: str
44
+ id_type: str
45
+ vector_field_name: str
46
+ metric_type: str
47
+ timeout: float | None
48
+ schema: CollectionSchema | None
49
+ index_params: IndexParams | None
50
+
51
+ class FetchKwargs(TypedDict):
52
+ """Arguments for fetching data from vector collections.
53
+
54
+ Controls how data is retrieved from vector databases, including filtering
55
+ and result limiting parameters.
56
+ """
57
+
58
+ collection_name: NotRequired[str | None]
59
+ similarity_threshold: NotRequired[float]
60
+ result_per_query: NotRequired[int]
@@ -3,7 +3,7 @@
3
3
  from typing import List, Self, Union
4
4
 
5
5
  from fabricatio.config import configs
6
- from fabricatio.models.utils import TaskStatus
6
+ from fabricatio.constants import TaskStatus
7
7
  from pydantic import BaseModel, ConfigDict, Field
8
8
 
9
9
  type EventLike = Union[str, List[str], "Event"]
@@ -77,23 +77,23 @@ class Event(BaseModel):
77
77
 
78
78
  def push_pending(self) -> Self:
79
79
  """Push a pending segment to the event."""
80
- return self.push(TaskStatus.Pending.value)
80
+ return self.push(TaskStatus.Pending)
81
81
 
82
82
  def push_running(self) -> Self:
83
83
  """Push a running segment to the event."""
84
- return self.push(TaskStatus.Running.value)
84
+ return self.push(TaskStatus.Running)
85
85
 
86
86
  def push_finished(self) -> Self:
87
87
  """Push a finished segment to the event."""
88
- return self.push(TaskStatus.Finished.value)
88
+ return self.push(TaskStatus.Finished)
89
89
 
90
90
  def push_failed(self) -> Self:
91
91
  """Push a failed segment to the event."""
92
- return self.push(TaskStatus.Failed.value)
92
+ return self.push(TaskStatus.Failed)
93
93
 
94
94
  def push_cancelled(self) -> Self:
95
95
  """Push a cancelled segment to the event."""
96
- return self.push(TaskStatus.Cancelled.value)
96
+ return self.push(TaskStatus.Cancelled)
97
97
 
98
98
  def pop(self) -> str:
99
99
  """Pop a segment from the event."""
@@ -2,17 +2,17 @@
2
2
 
3
3
  from typing import List
4
4
 
5
- from fabricatio.models.generic import Display, ProposedAble
5
+ from fabricatio.models.generic import SketchedAble
6
6
 
7
7
 
8
- class JudgeMent(ProposedAble, Display):
8
+ class JudgeMent(SketchedAble):
9
9
  """Represents a judgment result containing supporting/denying evidence and final verdict.
10
10
 
11
11
  The class stores both affirmative and denies evidence, truth and reasons lists along with the final boolean judgment.
12
12
  """
13
13
 
14
14
  issue_to_judge: str
15
- """The issue to be judged"""
15
+ """The issue to be judged, including the original question and context"""
16
16
 
17
17
  deny_evidence: List[str]
18
18
  """List of clues supporting the denial."""
@@ -21,7 +21,7 @@ class JudgeMent(ProposedAble, Display):
21
21
  """List of clues supporting the affirmation."""
22
22
 
23
23
  final_judgement: bool
24
- """The final judgment made according to all extracted clues."""
24
+ """The final judgment made according to all extracted clues. true for the `issue_to_judge` is correct and false for incorrect."""
25
25
 
26
26
  def __bool__(self) -> bool:
27
27
  """Return the final judgment value.
@@ -0,0 +1,170 @@
1
+ """A Module containing the article rag models."""
2
+
3
+ from pathlib import Path
4
+ from typing import ClassVar, Dict, List, Optional, Self, Unpack
5
+
6
+ from fabricatio.rust import BibManager, split_into_chunks, is_chinese
7
+ from more_itertools.recipes import flatten
8
+ from pydantic import Field
9
+
10
+ from fabricatio.fs import safe_text_read
11
+ from fabricatio.journal import logger
12
+ from fabricatio.models.extra.article_main import ArticleSubsection
13
+ from fabricatio.models.extra.rag import MilvusDataBase
14
+ from fabricatio.models.generic import AsPrompt
15
+ from fabricatio.models.kwargs_types import ChunkKwargs
16
+ from fabricatio.utils import ok, wrapp_in_block
17
+
18
+
19
+ class ArticleChunk(MilvusDataBase, AsPrompt):
20
+ """The chunk of an article."""
21
+
22
+ etc_word: ClassVar[str] = "等"
23
+ and_word: ClassVar[str] = "与"
24
+ _cite_number: Optional[int] = None
25
+
26
+ head_split: ClassVar[List[str]] = [
27
+ "引 言",
28
+ "引言",
29
+ "绪 论",
30
+ "绪论",
31
+ "前言",
32
+ "INTRODUCTION",
33
+ "Introduction",
34
+ ]
35
+ tail_split: ClassVar[List[str]] = [
36
+ "参 考 文 献",
37
+ "参 考 文 献",
38
+ "参考文献",
39
+ "REFERENCES",
40
+ "References",
41
+ "Bibliography",
42
+ "Reference",
43
+ ]
44
+ chunk: str
45
+ """The segment of the article"""
46
+ year: int
47
+ """The year of the article"""
48
+ authors: List[str] = Field(default_factory=list)
49
+ """The authors of the article"""
50
+ article_title: str
51
+ """The title of the article"""
52
+ bibtex_cite_key: str
53
+ """The bibtex cite key of the article"""
54
+
55
+ def _as_prompt_inner(self) -> Dict[str, str]:
56
+ return {
57
+ f"{ok(self._cite_number, 'You need to update cite number first.')}th reference `{self.article_title}`": f"{wrapp_in_block(self.chunk, 'Referring Content')}\n"
58
+ f"Authors: {';'.join(self.authors)}\n"
59
+ f"Published Year: {self.year}\n"
60
+ }
61
+
62
+ def _prepare_vectorization_inner(self) -> str:
63
+ return self.chunk
64
+
65
+ @classmethod
66
+ def from_file[P: str | Path](
67
+ cls, path: P | List[P], bib_mgr: BibManager, **kwargs: Unpack[ChunkKwargs]
68
+ ) -> List[Self]:
69
+ """Load the article chunks from the file."""
70
+ if isinstance(path, list):
71
+ result = list(flatten(cls._from_file_inner(p, bib_mgr, **kwargs) for p in path))
72
+ logger.debug(f"Number of chunks created from list of files: {len(result)}")
73
+ return result
74
+
75
+ return cls._from_file_inner(path, bib_mgr, **kwargs)
76
+
77
+ @classmethod
78
+ def _from_file_inner(cls, path: str | Path, bib_mgr: BibManager, **kwargs: Unpack[ChunkKwargs]) -> List[Self]:
79
+ path = Path(path)
80
+
81
+ title_seg = path.stem.split(" - ").pop()
82
+
83
+ key = (
84
+ bib_mgr.get_cite_key_by_title(title_seg)
85
+ or bib_mgr.get_cite_key_by_title_fuzzy(title_seg)
86
+ or bib_mgr.get_cite_key_fuzzy(path.stem)
87
+ )
88
+ if key is None:
89
+ logger.warning(f"no cite key found for {path.as_posix()}, skip.")
90
+ return []
91
+ authors = ok(bib_mgr.get_author_by_key(key), f"no author found for {key}")
92
+ year = ok(bib_mgr.get_year_by_key(key), f"no year found for {key}")
93
+ article_title = ok(bib_mgr.get_title_by_key(key), f"no title found for {key}")
94
+
95
+ result = [
96
+ cls(chunk=c, year=year, authors=authors, article_title=article_title, bibtex_cite_key=key)
97
+ for c in split_into_chunks(cls.strip(safe_text_read(path)), **kwargs)
98
+ ]
99
+ logger.debug(f"Number of chunks created from file {path.as_posix()}: {len(result)}")
100
+ return result
101
+
102
+ @classmethod
103
+ def strip(cls, string: str) -> str:
104
+ """Strip the head and tail of the string."""
105
+ logger.debug(f"String length before strip: {(original := len(string))}")
106
+ for split in (s for s in cls.head_split if s in string):
107
+ logger.debug(f"Strip head using {split}")
108
+ parts = string.split(split)
109
+ string = split.join(parts[1:]) if len(parts) > 1 else parts[0]
110
+ break
111
+ logger.debug(
112
+ f"String length after head strip: {(stripped_len := len(string))}, decreased by {(d := original - stripped_len)}"
113
+ )
114
+ if not d:
115
+ logger.warning("No decrease at head strip, which is might be abnormal.")
116
+ for split in (s for s in cls.tail_split if s in string):
117
+ logger.debug(f"Strip tail using {split}")
118
+ parts = string.split(split)
119
+ string = split.join(parts[:-1]) if len(parts) > 1 else parts[0]
120
+ break
121
+ logger.debug(f"String length after tail strip: {len(string)}, decreased by {(d := stripped_len - len(string))}")
122
+ if not d:
123
+ logger.warning("No decrease at tail strip, which is might be abnormal.")
124
+
125
+ return string
126
+
127
+ def as_typst_cite(self) -> str:
128
+ """As typst cite."""
129
+ return f"#cite(<{self.bibtex_cite_key}>)"
130
+
131
+ @property
132
+ def auther_firstnames(self) -> List[str]:
133
+ """Get the first name of the authors."""
134
+ ret = []
135
+ for n in self.authors:
136
+ if is_chinese(n):
137
+ ret.append(n[0])
138
+ else:
139
+ ret.append(n.split()[-1])
140
+ return ret
141
+
142
+ def as_auther_seq(self) -> str:
143
+ """Get the auther sequence."""
144
+ match len(self.authors):
145
+ case 0:
146
+ raise ValueError("No authors found")
147
+ case 1:
148
+ return f"({self.auther_firstnames[0]},{self.year}){self.as_typst_cite()}"
149
+ case 2:
150
+ return f"({self.auther_firstnames[0]}{self.and_word}{self.auther_firstnames[1]},{self.year}){self.as_typst_cite()}"
151
+ case 3:
152
+ return f"({self.auther_firstnames[0]},{self.auther_firstnames[1]}{self.and_word}{self.auther_firstnames[2]},{self.year}){self.as_typst_cite()}"
153
+ case _:
154
+ return f"({self.auther_firstnames[0]},{self.auther_firstnames[1]}{self.and_word}{self.auther_firstnames[2]}{self.etc_word},{self.year}){self.as_typst_cite()}"
155
+
156
+ def update_cite_number(self, cite_number: int) -> Self:
157
+ """Update the cite number."""
158
+ self._cite_number = cite_number
159
+ return self
160
+
161
+ def replace_cite(self, string: str, left_char: str = "[[", right_char: str = "]]") -> str:
162
+ """Replace the cite number in the string."""
163
+ return string.replace(f"{left_char}{ok(self._cite_number)}{right_char}", self.as_auther_seq())
164
+
165
+ def apply(self, article_subsection: ArticleSubsection) -> ArticleSubsection:
166
+ """Apply the patch to the article subsection."""
167
+ for p in article_subsection.paragraphs:
168
+ p.content = self.replace_cite(p.content)
169
+
170
+ return article_subsection
@@ -2,8 +2,7 @@
2
2
 
3
3
  from abc import ABC, abstractmethod
4
4
  from enum import StrEnum
5
- from itertools import chain
6
- from typing import Generator, List, Optional, Self, Tuple, overload
5
+ from typing import Generator, List, Optional, Self, Tuple
7
6
 
8
7
  from fabricatio.models.generic import (
9
8
  AsPrompt,
@@ -15,9 +14,11 @@ from fabricatio.models.generic import (
15
14
  PersistentAble,
16
15
  ProposedUpdateAble,
17
16
  ResolveUpdateConflict,
18
- SequencePatch,
19
17
  SketchedAble,
18
+ Titled,
19
+ WordCount,
20
20
  )
21
+ from pydantic import Field
21
22
 
22
23
 
23
24
  class ReferringType(StrEnum):
@@ -28,102 +29,25 @@ class ReferringType(StrEnum):
28
29
  SUBSECTION = "subsection"
29
30
 
30
31
 
31
- type RefKey = Tuple[str, Optional[str], Optional[str]]
32
-
33
-
34
- class ArticleRef(ProposedUpdateAble):
35
- """Reference to a specific chapter, section or subsection within the article. You SHALL not refer to an article component that is external and not present within our own article.
36
-
37
- Examples:
38
- - Referring to a chapter titled `Introduction`:
39
- Using Python
40
- ```python
41
- ArticleRef(referred_chapter_title="Introduction")
42
- ```
43
- Using JSON
44
- ```json
45
- {referred_chapter_title="Introduction"}
46
- ```
47
- - Referring to a section titled `Background` under the `Introduction` chapter:
48
- Using Python
49
- ```python
50
- ArticleRef(referred_chapter_title="Introduction", referred_section_title="Background")
51
- ```
52
- Using JSON
53
- ```json
54
- {referred_chapter_title="Introduction", referred_section_title="Background"}
55
- ```
56
- - Referring to a subsection titled `Related Work` under the `Background` section of the `Introduction` chapter:
57
- Using Python
58
- ```python
59
- ArticleRef(referred_chapter_title="Introduction", referred_section_title="Background", referred_subsection_title="Related Work")
60
- ```
61
- Using JSON
62
- ```json
63
- {referred_chapter_title="Introduction", referred_section_title="Background", referred_subsection_title="Related Work"}
64
- ```
65
- """
66
-
67
- referred_chapter_title: str
68
- """`title` Field of the referenced chapter"""
69
- referred_section_title: Optional[str] = None
70
- """`title` Field of the referenced section."""
71
- referred_subsection_title: Optional[str] = None
72
- """`title` Field of the referenced subsection."""
73
-
74
- def update_from_inner(self, other: Self) -> Self:
75
- """Updates the current instance with the attributes of another instance."""
76
- self.referred_chapter_title = other.referred_chapter_title
77
- self.referred_section_title = other.referred_section_title
78
- self.referred_subsection_title = other.referred_subsection_title
79
- return self
80
-
81
- def deref(self, article: "ArticleBase") -> Optional["ArticleOutlineBase"]:
82
- """Dereference the reference to the actual section or subsection within the provided article.
83
-
84
- Args:
85
- article (ArticleOutline | Article): The article to dereference the reference from.
86
32
 
87
- Returns:
88
- ArticleMainBase | ArticleOutline | None: The dereferenced section or subsection, or None if not found.
89
- """
90
- chap = next((chap for chap in article.chapters if chap.title == self.referred_chapter_title), None)
91
- if self.referred_section_title is None or chap is None:
92
- return chap
93
- sec = next((sec for sec in chap.sections if sec.title == self.referred_section_title), None)
94
- if self.referred_subsection_title is None or sec is None:
95
- return sec
96
- return next((subsec for subsec in sec.subsections if subsec.title == self.referred_subsection_title), None)
33
+ type RefKey = Tuple[str, Optional[str], Optional[str]]
97
34
 
98
- @property
99
- def referring_type(self) -> ReferringType:
100
- """Determine the type of reference based on the presence of specific attributes."""
101
- if self.referred_subsection_title is not None:
102
- return ReferringType.SUBSECTION
103
- if self.referred_section_title is not None:
104
- return ReferringType.SECTION
105
- return ReferringType.CHAPTER
106
35
 
107
36
 
108
- class ArticleMetaData(SketchedAble, Described, Language):
37
+ class ArticleMetaData(SketchedAble, Described, WordCount, Titled, Language):
109
38
  """Metadata for an article component."""
110
39
 
111
- support_to: List[ArticleRef]
112
- """List of references to other component of this articles that this component supports."""
113
- depend_on: List[ArticleRef]
114
- """List of references to other component of this articles that this component depends on."""
40
+ description: str = Field(
41
+ alias="elaboration",
42
+ description=Described.model_fields["description"].description,
43
+ )
115
44
 
116
- writing_aim: List[str]
117
- """List of writing aims of the research component in academic style."""
118
- title: str
119
- """Do not add any prefix or suffix to the title. should not contain special characters."""
45
+ title: str = Field(alias="heading", description=Titled.model_fields["title"].description)
120
46
 
121
- expected_word_count: int
122
- """Expected word count of this research component."""
47
+ aims: List[str]
48
+ """List of writing aims of the research component in academic style."""
123
49
 
124
50
 
125
- class ArticleRefSequencePatch(SequencePatch[ArticleRef]):
126
- """Patch for article refs."""
127
51
 
128
52
 
129
53
  class ArticleOutlineBase(
@@ -143,12 +67,8 @@ class ArticleOutlineBase(
143
67
 
144
68
  def update_metadata(self, other: ArticleMetaData) -> Self:
145
69
  """Updates the metadata of the current instance with the attributes of another instance."""
146
- self.support_to.clear()
147
- self.support_to.extend(other.support_to)
148
- self.depend_on.clear()
149
- self.depend_on.extend(other.depend_on)
150
- self.writing_aim.clear()
151
- self.writing_aim.extend(other.writing_aim)
70
+ self.aims.clear()
71
+ self.aims.extend(other.aims)
152
72
  self.description = other.description
153
73
  return self
154
74
 
@@ -272,22 +192,19 @@ class ChapterBase[T: SectionBase](ArticleOutlineBase):
272
192
  return ""
273
193
 
274
194
 
275
- class ArticleBase[T: ChapterBase](FinalizedDumpAble, AsPrompt, Language, ABC):
195
+ class ArticleBase[T: ChapterBase](FinalizedDumpAble, AsPrompt, WordCount, Described, Titled, Language, ABC):
276
196
  """Base class for article outlines."""
277
197
 
278
- title: str
279
- """Title of the academic paper."""
280
-
281
- prospect: str
282
- """Consolidated research statement with four pillars:
283
- 1. Problem Identification: Current limitations
284
- 2. Methodological Response: Technical approach
285
- 3. Empirical Validation: Evaluation strategy
286
- 4. Scholarly Impact: Field contributions
287
- """
198
+ title: str = Field(alias="heading", description=Titled.model_fields["title"].description)
199
+ description: str = Field(alias="abstract")
200
+ """The abstract serves as a concise summary of an academic article, encapsulating its core purpose, methodologies, key results,
201
+ and conclusions while enabling readers to rapidly assess the relevance and significance of the study.
202
+ Functioning as the article's distilled essence, it succinctly articulates the research problem, objectives,
203
+ and scope, providing a roadmap for the full text while also facilitating database indexing, literature reviews,
204
+ and citation tracking through standardized metadata. Additionally, it acts as an accessibility gateway,
205
+ allowing scholars to gauge the study's contribution to existing knowledge, its methodological rigor,
206
+ and its broader implications without engaging with the entire manuscript, thereby optimizing scholarly communication efficiency."""
288
207
 
289
- abstract: str
290
- """The abstract is a concise summary of the academic paper's main findings."""
291
208
  chapters: List[T]
292
209
  """Chapters of the article. Contains at least one chapter. You can also add more as needed."""
293
210
 
@@ -317,34 +234,6 @@ class ArticleBase[T: ChapterBase](FinalizedDumpAble, AsPrompt, Language, ABC):
317
234
  yield sec
318
235
  yield from sec.subsections
319
236
 
320
- def iter_support_on(self, rev: bool = False) -> Generator[ArticleRef, None, None]:
321
- """Iterates over all references that the article components support.
322
-
323
- Args:
324
- rev (bool): If True, iterate in reverse order.
325
-
326
- Yields:
327
- ArticleRef: Each reference that the article components support.
328
- """
329
- if rev:
330
- yield from chain(*[a.support_to for a in self.iter_dfs_rev()])
331
- return
332
- yield from chain(*[a.support_to for a in self.iter_dfs()])
333
-
334
- def iter_depend_on(self, rev: bool = False) -> Generator[ArticleRef, None, None]:
335
- """Iterates over all references that the article components depend on.
336
-
337
- Args:
338
- rev (bool): If True, iterate in reverse order.
339
-
340
- Yields:
341
- ArticleRef: Each reference that the article components depend on.
342
- """
343
- if rev:
344
- yield from chain(*[a.depend_on for a in self.iter_dfs_rev()])
345
- return
346
- yield from chain(*[a.depend_on for a in self.iter_dfs()])
347
-
348
237
  def iter_sections(self) -> Generator[Tuple[ChapterBase, SectionBase], None, None]:
349
238
  """Iterates through all sections in the article.
350
239
 
@@ -378,12 +267,6 @@ class ArticleBase[T: ChapterBase](FinalizedDumpAble, AsPrompt, Language, ABC):
378
267
  """Gathers all introspected components in the article structure."""
379
268
  return "\n".join([i for component in self.chapters if (i := component.introspect())])
380
269
 
381
- @overload
382
- def find_illegal_ref(self, gather_identical: bool) -> Optional[Tuple[ArticleRef | List[ArticleRef], str]]: ...
383
-
384
- @overload
385
- def find_illegal_ref(self) -> Optional[Tuple[ArticleRef, str]]: ...
386
-
387
270
  def iter_chap_title(self) -> Generator[str, None, None]:
388
271
  """Iterates through all chapter titles in the article."""
389
272
  for chap in self.chapters:
@@ -399,75 +282,6 @@ class ArticleBase[T: ChapterBase](FinalizedDumpAble, AsPrompt, Language, ABC):
399
282
  for _, _, subsec in self.iter_subsections():
400
283
  yield subsec.title
401
284
 
402
- def find_illegal_ref(self, gather_identical: bool = False) -> Optional[Tuple[ArticleRef | List[ArticleRef], str]]:
403
- """Finds the first illegal component in the outline.
404
-
405
- Returns:
406
- Tuple[ArticleOutlineBase, str]: A tuple containing the illegal component and an error message.
407
- """
408
- summary = ""
409
- chap_titles_set = set(self.iter_chap_title())
410
- sec_titles_set = set(self.iter_section_title())
411
- subsec_titles_set = set(self.iter_subsection_title())
412
-
413
- for component in self.iter_dfs_rev():
414
- for ref in chain(component.depend_on, component.support_to):
415
- if not ref.deref(self):
416
- summary += f"Invalid internal reference in `{component.__class__.__name__}` titled `{component.title}`, because the referred {ref.referring_type} is not exists within the article, see the original obj dump: {ref.model_dump()}\n"
417
-
418
- if ref.referred_chapter_title not in (chap_titles_set):
419
- summary += f"Chapter titled `{ref.referred_chapter_title}` is not any of {chap_titles_set}\n"
420
- if ref.referred_section_title and ref.referred_section_title not in (sec_titles_set):
421
- summary += f"Section Titled `{ref.referred_section_title}` is not any of {sec_titles_set}\n"
422
- if ref.referred_subsection_title and ref.referred_subsection_title not in (subsec_titles_set):
423
- summary += (
424
- f"Subsection Titled `{ref.referred_subsection_title}` is not any of {subsec_titles_set}"
425
- )
426
-
427
- if summary:
428
- return (
429
- (
430
- [
431
- identical_ref
432
- for identical_ref in chain(self.iter_depend_on(), self.iter_support_on())
433
- if identical_ref == ref
434
- ],
435
- summary,
436
- )
437
- if gather_identical
438
- else (ref, summary)
439
- )
440
-
441
- return None
442
-
443
- def gather_illegal_ref(self) -> Tuple[List[ArticleRef], str]:
444
- """Gathers all illegal references in the article."""
445
- summary = []
446
- chap_titles_set = set(self.iter_chap_title())
447
- sec_titles_set = set(self.iter_section_title())
448
- subsec_titles_set = set(self.iter_subsection_title())
449
- res_seq = []
450
-
451
- for component in self.iter_dfs():
452
- for ref in (
453
- r for r in chain(component.depend_on, component.support_to) if not r.deref(self) and r not in res_seq
454
- ):
455
- res_seq.append(ref)
456
- if ref.referred_chapter_title not in chap_titles_set:
457
- summary.append(
458
- f"Chapter titled `{ref.referred_chapter_title}` is not exist, since it is not any of {chap_titles_set}."
459
- )
460
- if ref.referred_section_title and (ref.referred_section_title not in sec_titles_set):
461
- summary.append(
462
- f"Section Titled `{ref.referred_section_title}` is not exist, since it is not any of {sec_titles_set}"
463
- )
464
- if ref.referred_subsection_title and (ref.referred_subsection_title not in subsec_titles_set):
465
- summary.append(
466
- f"Subsection Titled `{ref.referred_subsection_title}` is not exist, since it is not any of {subsec_titles_set}"
467
- )
468
-
469
- return res_seq, "\n".join(summary)
470
-
471
285
  def finalized_dump(self) -> str:
472
286
  """Generates standardized hierarchical markup for academic publishing systems.
473
287
 
@@ -1,8 +1,9 @@
1
1
  """ArticleEssence: Semantic fingerprint of academic paper for structured analysis."""
2
2
 
3
- from typing import List, Self
3
+ from typing import List
4
4
 
5
- from fabricatio.models.generic import Display, PersistentAble, ProposedAble, Vectorizable
5
+ from fabricatio.models.extra.rag import MilvusDataBase
6
+ from fabricatio.models.generic import PersistentAble, SketchedAble
6
7
  from pydantic import BaseModel
7
8
 
8
9
 
@@ -54,7 +55,7 @@ class Highlightings(BaseModel):
54
55
  """
55
56
 
56
57
 
57
- class ArticleEssence(ProposedAble, Display, PersistentAble, Vectorizable):
58
+ class ArticleEssence(SketchedAble, PersistentAble, MilvusDataBase):
58
59
  """Structured representation of a scientific article's core elements in its original language."""
59
60
 
60
61
  language: str
@@ -93,7 +94,7 @@ class ArticleEssence(ProposedAble, Display, PersistentAble, Vectorizable):
93
94
  bibtex_cite_key: str
94
95
  """Bibtex cite key of the original article."""
95
96
 
96
- def update_cite_key(self, new_cite_key: str) -> Self:
97
- """Update the bibtex_cite_key of the article."""
98
- self.bibtex_cite_key = new_cite_key
99
- return self
97
+ def _prepare_vectorization_inner(self) -> str:
98
+ return self.compact()
99
+
100
+