fabricatio 0.2.9.dev4__cp312-cp312-win_amd64.whl → 0.2.10__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. fabricatio/actions/article.py +20 -106
  2. fabricatio/actions/article_rag.py +153 -22
  3. fabricatio/actions/fs.py +25 -0
  4. fabricatio/actions/output.py +17 -3
  5. fabricatio/actions/rag.py +40 -18
  6. fabricatio/actions/rules.py +14 -3
  7. fabricatio/capabilities/check.py +2 -1
  8. fabricatio/capabilities/rag.py +41 -231
  9. fabricatio/config.py +4 -2
  10. fabricatio/constants.py +20 -0
  11. fabricatio/decorators.py +23 -0
  12. fabricatio/models/adv_kwargs_types.py +35 -0
  13. fabricatio/models/events.py +6 -6
  14. fabricatio/models/extra/advanced_judge.py +2 -2
  15. fabricatio/models/extra/aricle_rag.py +170 -0
  16. fabricatio/models/extra/article_base.py +2 -186
  17. fabricatio/models/extra/article_essence.py +8 -7
  18. fabricatio/models/extra/article_main.py +39 -107
  19. fabricatio/models/extra/problem.py +12 -17
  20. fabricatio/models/extra/rag.py +98 -0
  21. fabricatio/models/extra/rule.py +1 -2
  22. fabricatio/models/generic.py +35 -12
  23. fabricatio/models/kwargs_types.py +8 -36
  24. fabricatio/models/task.py +3 -3
  25. fabricatio/models/usages.py +80 -6
  26. fabricatio/rust.cp312-win_amd64.pyd +0 -0
  27. fabricatio/rust.pyi +138 -6
  28. fabricatio/utils.py +62 -4
  29. fabricatio-0.2.10.data/scripts/tdown.exe +0 -0
  30. {fabricatio-0.2.9.dev4.dist-info → fabricatio-0.2.10.dist-info}/METADATA +1 -4
  31. fabricatio-0.2.10.dist-info/RECORD +64 -0
  32. fabricatio/models/utils.py +0 -148
  33. fabricatio-0.2.9.dev4.data/scripts/tdown.exe +0 -0
  34. fabricatio-0.2.9.dev4.dist-info/RECORD +0 -61
  35. {fabricatio-0.2.9.dev4.dist-info → fabricatio-0.2.10.dist-info}/WHEEL +0 -0
  36. {fabricatio-0.2.9.dev4.dist-info → fabricatio-0.2.10.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,170 @@
1
+ """A Module containing the article rag models."""
2
+
3
+ from pathlib import Path
4
+ from typing import ClassVar, Dict, List, Optional, Self, Unpack
5
+
6
+ from fabricatio.rust import BibManager, split_into_chunks, is_chinese
7
+ from more_itertools.recipes import flatten
8
+ from pydantic import Field
9
+
10
+ from fabricatio.fs import safe_text_read
11
+ from fabricatio.journal import logger
12
+ from fabricatio.models.extra.article_main import ArticleSubsection
13
+ from fabricatio.models.extra.rag import MilvusDataBase
14
+ from fabricatio.models.generic import AsPrompt
15
+ from fabricatio.models.kwargs_types import ChunkKwargs
16
+ from fabricatio.utils import ok, wrapp_in_block
17
+
18
+
19
+ class ArticleChunk(MilvusDataBase, AsPrompt):
20
+ """The chunk of an article."""
21
+
22
+ etc_word: ClassVar[str] = "等"
23
+ and_word: ClassVar[str] = "与"
24
+ _cite_number: Optional[int] = None
25
+
26
+ head_split: ClassVar[List[str]] = [
27
+ "引 言",
28
+ "引言",
29
+ "绪 论",
30
+ "绪论",
31
+ "前言",
32
+ "INTRODUCTION",
33
+ "Introduction",
34
+ ]
35
+ tail_split: ClassVar[List[str]] = [
36
+ "参 考 文 献",
37
+ "参 考 文 献",
38
+ "参考文献",
39
+ "REFERENCES",
40
+ "References",
41
+ "Bibliography",
42
+ "Reference",
43
+ ]
44
+ chunk: str
45
+ """The segment of the article"""
46
+ year: int
47
+ """The year of the article"""
48
+ authors: List[str] = Field(default_factory=list)
49
+ """The authors of the article"""
50
+ article_title: str
51
+ """The title of the article"""
52
+ bibtex_cite_key: str
53
+ """The bibtex cite key of the article"""
54
+
55
+ def _as_prompt_inner(self) -> Dict[str, str]:
56
+ return {
57
+ f"{ok(self._cite_number, 'You need to update cite number first.')}th reference `{self.article_title}`": f"{wrapp_in_block(self.chunk, 'Referring Content')}\n"
58
+ f"Authors: {';'.join(self.authors)}\n"
59
+ f"Published Year: {self.year}\n"
60
+ }
61
+
62
+ def _prepare_vectorization_inner(self) -> str:
63
+ return self.chunk
64
+
65
+ @classmethod
66
+ def from_file[P: str | Path](
67
+ cls, path: P | List[P], bib_mgr: BibManager, **kwargs: Unpack[ChunkKwargs]
68
+ ) -> List[Self]:
69
+ """Load the article chunks from the file."""
70
+ if isinstance(path, list):
71
+ result = list(flatten(cls._from_file_inner(p, bib_mgr, **kwargs) for p in path))
72
+ logger.debug(f"Number of chunks created from list of files: {len(result)}")
73
+ return result
74
+
75
+ return cls._from_file_inner(path, bib_mgr, **kwargs)
76
+
77
+ @classmethod
78
+ def _from_file_inner(cls, path: str | Path, bib_mgr: BibManager, **kwargs: Unpack[ChunkKwargs]) -> List[Self]:
79
+ path = Path(path)
80
+
81
+ title_seg = path.stem.split(" - ").pop()
82
+
83
+ key = (
84
+ bib_mgr.get_cite_key_by_title(title_seg)
85
+ or bib_mgr.get_cite_key_by_title_fuzzy(title_seg)
86
+ or bib_mgr.get_cite_key_fuzzy(path.stem)
87
+ )
88
+ if key is None:
89
+ logger.warning(f"no cite key found for {path.as_posix()}, skip.")
90
+ return []
91
+ authors = ok(bib_mgr.get_author_by_key(key), f"no author found for {key}")
92
+ year = ok(bib_mgr.get_year_by_key(key), f"no year found for {key}")
93
+ article_title = ok(bib_mgr.get_title_by_key(key), f"no title found for {key}")
94
+
95
+ result = [
96
+ cls(chunk=c, year=year, authors=authors, article_title=article_title, bibtex_cite_key=key)
97
+ for c in split_into_chunks(cls.strip(safe_text_read(path)), **kwargs)
98
+ ]
99
+ logger.debug(f"Number of chunks created from file {path.as_posix()}: {len(result)}")
100
+ return result
101
+
102
+ @classmethod
103
+ def strip(cls, string: str) -> str:
104
+ """Strip the head and tail of the string."""
105
+ logger.debug(f"String length before strip: {(original := len(string))}")
106
+ for split in (s for s in cls.head_split if s in string):
107
+ logger.debug(f"Strip head using {split}")
108
+ parts = string.split(split)
109
+ string = split.join(parts[1:]) if len(parts) > 1 else parts[0]
110
+ break
111
+ logger.debug(
112
+ f"String length after head strip: {(stripped_len := len(string))}, decreased by {(d := original - stripped_len)}"
113
+ )
114
+ if not d:
115
+ logger.warning("No decrease at head strip, which is might be abnormal.")
116
+ for split in (s for s in cls.tail_split if s in string):
117
+ logger.debug(f"Strip tail using {split}")
118
+ parts = string.split(split)
119
+ string = split.join(parts[:-1]) if len(parts) > 1 else parts[0]
120
+ break
121
+ logger.debug(f"String length after tail strip: {len(string)}, decreased by {(d := stripped_len - len(string))}")
122
+ if not d:
123
+ logger.warning("No decrease at tail strip, which is might be abnormal.")
124
+
125
+ return string
126
+
127
+ def as_typst_cite(self) -> str:
128
+ """As typst cite."""
129
+ return f"#cite(<{self.bibtex_cite_key}>)"
130
+
131
+ @property
132
+ def auther_firstnames(self) -> List[str]:
133
+ """Get the first name of the authors."""
134
+ ret = []
135
+ for n in self.authors:
136
+ if is_chinese(n):
137
+ ret.append(n[0])
138
+ else:
139
+ ret.append(n.split()[-1])
140
+ return ret
141
+
142
+ def as_auther_seq(self) -> str:
143
+ """Get the auther sequence."""
144
+ match len(self.authors):
145
+ case 0:
146
+ raise ValueError("No authors found")
147
+ case 1:
148
+ return f"({self.auther_firstnames[0]},{self.year}){self.as_typst_cite()}"
149
+ case 2:
150
+ return f"({self.auther_firstnames[0]}{self.and_word}{self.auther_firstnames[1]},{self.year}){self.as_typst_cite()}"
151
+ case 3:
152
+ return f"({self.auther_firstnames[0]},{self.auther_firstnames[1]}{self.and_word}{self.auther_firstnames[2]},{self.year}){self.as_typst_cite()}"
153
+ case _:
154
+ return f"({self.auther_firstnames[0]},{self.auther_firstnames[1]}{self.and_word}{self.auther_firstnames[2]}{self.etc_word},{self.year}){self.as_typst_cite()}"
155
+
156
+ def update_cite_number(self, cite_number: int) -> Self:
157
+ """Update the cite number."""
158
+ self._cite_number = cite_number
159
+ return self
160
+
161
+ def replace_cite(self, string: str, left_char: str = "[[", right_char: str = "]]") -> str:
162
+ """Replace the cite number in the string."""
163
+ return string.replace(f"{left_char}{ok(self._cite_number)}{right_char}", self.as_auther_seq())
164
+
165
+ def apply(self, article_subsection: ArticleSubsection) -> ArticleSubsection:
166
+ """Apply the patch to the article subsection."""
167
+ for p in article_subsection.paragraphs:
168
+ p.content = self.replace_cite(p.content)
169
+
170
+ return article_subsection
@@ -2,8 +2,7 @@
2
2
 
3
3
  from abc import ABC, abstractmethod
4
4
  from enum import StrEnum
5
- from itertools import chain
6
- from typing import Generator, List, Optional, Self, Tuple, overload
5
+ from typing import Generator, List, Optional, Self, Tuple
7
6
 
8
7
  from fabricatio.models.generic import (
9
8
  AsPrompt,
@@ -15,7 +14,6 @@ from fabricatio.models.generic import (
15
14
  PersistentAble,
16
15
  ProposedUpdateAble,
17
16
  ResolveUpdateConflict,
18
- SequencePatch,
19
17
  SketchedAble,
20
18
  Titled,
21
19
  WordCount,
@@ -31,81 +29,9 @@ class ReferringType(StrEnum):
31
29
  SUBSECTION = "subsection"
32
30
 
33
31
 
34
- type RefKey = Tuple[str, Optional[str], Optional[str]]
35
-
36
-
37
- class ArticleRef(ProposedUpdateAble):
38
- """Reference to a specific chapter, section or subsection within the article. You SHALL not refer to an article component that is external and not present within our own article.
39
-
40
- Examples:
41
- - Referring to a chapter titled `Introduction`:
42
- Using Python
43
- ```python
44
- ArticleRef(chap="Introduction")
45
- ```
46
- Using JSON
47
- ```json
48
- {chap="Introduction"}
49
- ```
50
- - Referring to a section titled `Background` under the `Introduction` chapter:
51
- Using Python
52
- ```python
53
- ArticleRef(chap="Introduction", sec="Background")
54
- ```
55
- Using JSON
56
- ```json
57
- {chap="Introduction", sec="Background"}
58
- ```
59
- - Referring to a subsection titled `Related Work` under the `Background` section of the `Introduction` chapter:
60
- Using Python
61
- ```python
62
- ArticleRef(chap="Introduction", sec="Background", subsec="Related Work")
63
- ```
64
- Using JSON
65
- ```json
66
- {chap="Introduction", sec="Background", subsec="Related Work"}
67
- ```
68
- """
69
-
70
- chap: str
71
- """`title` Field of the referenced chapter"""
72
- sec: Optional[str] = None
73
- """`title` Field of the referenced section."""
74
- subsec: Optional[str] = None
75
- """`title` Field of the referenced subsection."""
76
-
77
- def update_from_inner(self, other: Self) -> Self:
78
- """Updates the current instance with the attributes of another instance."""
79
- self.chap = other.chap
80
- self.sec = other.sec
81
- self.subsec = other.subsec
82
- return self
83
-
84
- def deref(self, article: "ArticleBase") -> Optional["ArticleOutlineBase"]:
85
- """Dereference the reference to the actual section or subsection within the provided article.
86
-
87
- Args:
88
- article (ArticleOutline | Article): The article to dereference the reference from.
89
32
 
90
- Returns:
91
- ArticleMainBase | ArticleOutline | None: The dereferenced section or subsection, or None if not found.
92
- """
93
- chap = next((chap for chap in article.chapters if chap.title == self.chap), None)
94
- if self.sec is None or chap is None:
95
- return chap
96
- sec = next((sec for sec in chap.sections if sec.title == self.sec), None)
97
- if self.subsec is None or sec is None:
98
- return sec
99
- return next((subsec for subsec in sec.subsections if subsec.title == self.subsec), None)
33
+ type RefKey = Tuple[str, Optional[str], Optional[str]]
100
34
 
101
- @property
102
- def referring_type(self) -> ReferringType:
103
- """Determine the type of reference based on the presence of specific attributes."""
104
- if self.subsec is not None:
105
- return ReferringType.SUBSECTION
106
- if self.sec is not None:
107
- return ReferringType.SECTION
108
- return ReferringType.CHAPTER
109
35
 
110
36
 
111
37
  class ArticleMetaData(SketchedAble, Described, WordCount, Titled, Language):
@@ -121,15 +47,8 @@ class ArticleMetaData(SketchedAble, Described, WordCount, Titled, Language):
121
47
  aims: List[str]
122
48
  """List of writing aims of the research component in academic style."""
123
49
 
124
- support_to: List[ArticleRef]
125
- """List of references to other future components in this article that this component supports to."""
126
- depend_on: List[ArticleRef]
127
- """List of references to other previous components in this article that this component depends on."""
128
50
 
129
51
 
130
- class ArticleRefSequencePatch(SequencePatch[ArticleRef]):
131
- """Patch for article refs."""
132
-
133
52
 
134
53
  class ArticleOutlineBase(
135
54
  ArticleMetaData,
@@ -148,10 +67,6 @@ class ArticleOutlineBase(
148
67
 
149
68
  def update_metadata(self, other: ArticleMetaData) -> Self:
150
69
  """Updates the metadata of the current instance with the attributes of another instance."""
151
- self.support_to.clear()
152
- self.support_to.extend(other.support_to)
153
- self.depend_on.clear()
154
- self.depend_on.extend(other.depend_on)
155
70
  self.aims.clear()
156
71
  self.aims.extend(other.aims)
157
72
  self.description = other.description
@@ -319,34 +234,6 @@ class ArticleBase[T: ChapterBase](FinalizedDumpAble, AsPrompt, WordCount, Descri
319
234
  yield sec
320
235
  yield from sec.subsections
321
236
 
322
- def iter_support_on(self, rev: bool = False) -> Generator[ArticleRef, None, None]:
323
- """Iterates over all references that the article components support.
324
-
325
- Args:
326
- rev (bool): If True, iterate in reverse order.
327
-
328
- Yields:
329
- ArticleRef: Each reference that the article components support.
330
- """
331
- if rev:
332
- yield from chain(*[a.support_to for a in self.iter_dfs_rev()])
333
- return
334
- yield from chain(*[a.support_to for a in self.iter_dfs()])
335
-
336
- def iter_depend_on(self, rev: bool = False) -> Generator[ArticleRef, None, None]:
337
- """Iterates over all references that the article components depend on.
338
-
339
- Args:
340
- rev (bool): If True, iterate in reverse order.
341
-
342
- Yields:
343
- ArticleRef: Each reference that the article components depend on.
344
- """
345
- if rev:
346
- yield from chain(*[a.depend_on for a in self.iter_dfs_rev()])
347
- return
348
- yield from chain(*[a.depend_on for a in self.iter_dfs()])
349
-
350
237
  def iter_sections(self) -> Generator[Tuple[ChapterBase, SectionBase], None, None]:
351
238
  """Iterates through all sections in the article.
352
239
 
@@ -380,12 +267,6 @@ class ArticleBase[T: ChapterBase](FinalizedDumpAble, AsPrompt, WordCount, Descri
380
267
  """Gathers all introspected components in the article structure."""
381
268
  return "\n".join([i for component in self.chapters if (i := component.introspect())])
382
269
 
383
- @overload
384
- def find_illegal_ref(self, gather_identical: bool) -> Optional[Tuple[ArticleRef | List[ArticleRef], str]]: ...
385
-
386
- @overload
387
- def find_illegal_ref(self) -> Optional[Tuple[ArticleRef, str]]: ...
388
-
389
270
  def iter_chap_title(self) -> Generator[str, None, None]:
390
271
  """Iterates through all chapter titles in the article."""
391
272
  for chap in self.chapters:
@@ -401,71 +282,6 @@ class ArticleBase[T: ChapterBase](FinalizedDumpAble, AsPrompt, WordCount, Descri
401
282
  for _, _, subsec in self.iter_subsections():
402
283
  yield subsec.title
403
284
 
404
- def find_illegal_ref(self, gather_identical: bool = False) -> Optional[Tuple[ArticleRef | List[ArticleRef], str]]:
405
- """Finds the first illegal component in the outline.
406
-
407
- Returns:
408
- Tuple[ArticleOutlineBase, str]: A tuple containing the illegal component and an error message.
409
- """
410
- summary = ""
411
- chap_titles_set = set(self.iter_chap_title())
412
- sec_titles_set = set(self.iter_section_title())
413
- subsec_titles_set = set(self.iter_subsection_title())
414
-
415
- for component in self.iter_dfs_rev():
416
- for ref in chain(component.depend_on, component.support_to):
417
- if not ref.deref(self):
418
- summary += f"Invalid internal reference in `{component.__class__.__name__}` titled `{component.title}`, because the referred {ref.referring_type} is not exists within the article, see the original obj dump: {ref.model_dump()}\n"
419
-
420
- if ref.chap not in (chap_titles_set):
421
- summary += f"Chapter titled `{ref.chap}` is not any of {chap_titles_set}\n"
422
- if ref.sec and ref.sec not in (sec_titles_set):
423
- summary += f"Section Titled `{ref.sec}` is not any of {sec_titles_set}\n"
424
- if ref.subsec and ref.subsec not in (subsec_titles_set):
425
- summary += f"Subsection Titled `{ref.subsec}` is not any of {subsec_titles_set}"
426
-
427
- if summary:
428
- return (
429
- (
430
- [
431
- identical_ref
432
- for identical_ref in chain(self.iter_depend_on(), self.iter_support_on())
433
- if identical_ref == ref
434
- ],
435
- summary,
436
- )
437
- if gather_identical
438
- else (ref, summary)
439
- )
440
-
441
- return None
442
-
443
- def gather_illegal_ref(self) -> Tuple[List[ArticleRef], str]:
444
- """Gathers all illegal references in the article."""
445
- summary = []
446
- chap_titles_set = set(self.iter_chap_title())
447
- sec_titles_set = set(self.iter_section_title())
448
- subsec_titles_set = set(self.iter_subsection_title())
449
- res_seq = []
450
-
451
- for component in self.iter_dfs():
452
- for ref in (
453
- r for r in chain(component.depend_on, component.support_to) if not r.deref(self) and r not in res_seq
454
- ):
455
- res_seq.append(ref)
456
- if ref.chap not in chap_titles_set:
457
- summary.append(
458
- f"Chapter titled `{ref.chap}` is not exist, since it is not any of {chap_titles_set}."
459
- )
460
- if ref.sec and (ref.sec not in sec_titles_set):
461
- summary.append(f"Section Titled `{ref.sec}` is not exist, since it is not any of {sec_titles_set}")
462
- if ref.subsec and (ref.subsec not in subsec_titles_set):
463
- summary.append(
464
- f"Subsection Titled `{ref.subsec}` is not exist, since it is not any of {subsec_titles_set}"
465
- )
466
-
467
- return res_seq, "\n".join(summary)
468
-
469
285
  def finalized_dump(self) -> str:
470
286
  """Generates standardized hierarchical markup for academic publishing systems.
471
287
 
@@ -1,8 +1,9 @@
1
1
  """ArticleEssence: Semantic fingerprint of academic paper for structured analysis."""
2
2
 
3
- from typing import List, Self
3
+ from typing import List
4
4
 
5
- from fabricatio.models.generic import Display, PersistentAble, ProposedAble, Vectorizable
5
+ from fabricatio.models.extra.rag import MilvusDataBase
6
+ from fabricatio.models.generic import PersistentAble, SketchedAble
6
7
  from pydantic import BaseModel
7
8
 
8
9
 
@@ -54,7 +55,7 @@ class Highlightings(BaseModel):
54
55
  """
55
56
 
56
57
 
57
- class ArticleEssence(ProposedAble, Display, PersistentAble, Vectorizable):
58
+ class ArticleEssence(SketchedAble, PersistentAble, MilvusDataBase):
58
59
  """Structured representation of a scientific article's core elements in its original language."""
59
60
 
60
61
  language: str
@@ -93,7 +94,7 @@ class ArticleEssence(ProposedAble, Display, PersistentAble, Vectorizable):
93
94
  bibtex_cite_key: str
94
95
  """Bibtex cite key of the original article."""
95
96
 
96
- def update_cite_key(self, new_cite_key: str) -> Self:
97
- """Update the bibtex_cite_key of the article."""
98
- self.bibtex_cite_key = new_cite_key
99
- return self
97
+ def _prepare_vectorization_inner(self) -> str:
98
+ return self.compact()
99
+
100
+
@@ -1,13 +1,14 @@
1
1
  """ArticleBase and ArticleSubsection classes for managing hierarchical document components."""
2
2
 
3
- from itertools import chain
4
3
  from typing import Dict, Generator, List, Self, Tuple, override
5
4
 
5
+ from fabricatio.rust import word_count, convert_all_block_tex, convert_all_inline_tex
6
+ from pydantic import Field
7
+
6
8
  from fabricatio.fs.readers import extract_sections
7
9
  from fabricatio.journal import logger
8
10
  from fabricatio.models.extra.article_base import (
9
11
  ArticleBase,
10
- ArticleOutlineBase,
11
12
  ChapterBase,
12
13
  SectionBase,
13
14
  SubSectionBase,
@@ -16,9 +17,6 @@ from fabricatio.models.extra.article_outline import (
16
17
  ArticleOutline,
17
18
  )
18
19
  from fabricatio.models.generic import Described, PersistentAble, SequencePatch, SketchedAble, WithRef, WordCount
19
- from fabricatio.rust import detect_language, word_count
20
- from fabricatio.utils import ok
21
- from pydantic import Field
22
20
 
23
21
  PARAGRAPH_SEP = "// - - -"
24
22
 
@@ -66,10 +64,11 @@ class ArticleSubsection(SubSectionBase):
66
64
  summary = ""
67
65
  if len(self.paragraphs) == 0:
68
66
  summary += f"`{self.__class__.__name__}` titled `{self.title}` have no paragraphs, You should add some!\n"
69
- if abs((wc := self.word_count) - self.expected_word_count) / self.expected_word_count > self._max_word_count_deviation:
70
- summary += (
71
- f"`{self.__class__.__name__}` titled `{self.title}` have {wc} words, expected {self.word_count} words!"
72
- )
67
+ if (
68
+ abs((wc := self.word_count) - self.expected_word_count) / self.expected_word_count
69
+ > self._max_word_count_deviation
70
+ ):
71
+ summary += f"`{self.__class__.__name__}` titled `{self.title}` have {wc} words, expected {self.expected_word_count} words!"
73
72
 
74
73
  return summary
75
74
 
@@ -90,17 +89,14 @@ class ArticleSubsection(SubSectionBase):
90
89
  return f"=== {self.title}\n" + f"\n{PARAGRAPH_SEP}\n".join(p.content for p in self.paragraphs)
91
90
 
92
91
  @classmethod
93
- def from_typst_code(cls, title: str, body: str, language: str) -> Self:
92
+ def from_typst_code(cls, title: str, body: str) -> Self:
94
93
  """Creates an Article object from the given Typst code."""
95
94
  return cls(
96
95
  heading=title,
97
96
  elaboration="",
98
97
  paragraphs=[Paragraph.from_content(p) for p in body.split(PARAGRAPH_SEP)],
99
98
  expected_word_count=word_count(body),
100
- language=language,
101
99
  aims=[],
102
- support_to=[],
103
- depend_on=[],
104
100
  )
105
101
 
106
102
 
@@ -108,20 +104,16 @@ class ArticleSection(SectionBase[ArticleSubsection]):
108
104
  """Atomic argumentative unit with high-level specificity."""
109
105
 
110
106
  @classmethod
111
- def from_typst_code(cls, title: str, body: str, language: str) -> Self:
107
+ def from_typst_code(cls, title: str, body: str) -> Self:
112
108
  """Creates an Article object from the given Typst code."""
113
109
  return cls(
114
110
  subsections=[
115
- ArticleSubsection.from_typst_code(*pack, language=language)
116
- for pack in extract_sections(body, level=3, section_char="=")
111
+ ArticleSubsection.from_typst_code(*pack) for pack in extract_sections(body, level=3, section_char="=")
117
112
  ],
118
113
  heading=title,
119
114
  elaboration="",
120
115
  expected_word_count=word_count(body),
121
- language=language,
122
116
  aims=[],
123
- support_to=[],
124
- depend_on=[],
125
117
  )
126
118
 
127
119
 
@@ -129,20 +121,16 @@ class ArticleChapter(ChapterBase[ArticleSection]):
129
121
  """Thematic progression implementing research function."""
130
122
 
131
123
  @classmethod
132
- def from_typst_code(cls, title: str, body: str, language: str) -> Self:
124
+ def from_typst_code(cls, title: str, body: str) -> Self:
133
125
  """Creates an Article object from the given Typst code."""
134
126
  return cls(
135
127
  sections=[
136
- ArticleSection.from_typst_code(*pack, language=language)
137
- for pack in extract_sections(body, level=2, section_char="=")
128
+ ArticleSection.from_typst_code(*pack) for pack in extract_sections(body, level=2, section_char="=")
138
129
  ],
139
130
  heading=title,
140
131
  elaboration="",
141
132
  expected_word_count=word_count(body),
142
- language=language,
143
133
  aims=[],
144
- support_to=[],
145
- depend_on=[],
146
134
  )
147
135
 
148
136
 
@@ -166,6 +154,22 @@ class Article(
166
154
  "Original Article": self.display(),
167
155
  }
168
156
 
157
+ def convert_tex(self) -> Self:
158
+ """Convert tex to typst code"""
159
+ for _, _, subsec in self.iter_subsections():
160
+ for p in subsec.paragraphs:
161
+ p.content = convert_all_inline_tex(p.content)
162
+ p.content = convert_all_block_tex(p.content)
163
+ return self
164
+
165
+ def fix_wrapper(self) -> Self:
166
+ """Fix wrapper"""
167
+ for _, _, subsec in self.iter_subsections():
168
+ for p in subsec.paragraphs:
169
+ p.content = p.content.replace(r" \( ", "$").replace(r" \) ", "$").replace("\\[\n", "$$\n").replace(
170
+ "\n\\]", "\n$$")
171
+ return self
172
+
169
173
  @override
170
174
  def iter_subsections(self) -> Generator[Tuple[ArticleChapter, ArticleSection, ArticleSubsection], None, None]:
171
175
  return super().iter_subsections() # pyright: ignore [reportReturnType]
@@ -210,92 +214,20 @@ class Article(
210
214
  def from_typst_code(cls, title: str, body: str) -> Self:
211
215
  """Generates an article from the given Typst code."""
212
216
  return cls(
213
- language=(lang := detect_language(body)),
214
217
  chapters=[
215
- ArticleChapter.from_typst_code(*pack, language=lang)
216
- for pack in extract_sections(body, level=1, section_char="=")
218
+ ArticleChapter.from_typst_code(*pack) for pack in extract_sections(body, level=1, section_char="=")
217
219
  ],
218
220
  heading=title,
219
221
  expected_word_count=word_count(body),
220
222
  abstract="",
221
223
  )
222
224
 
223
- def gather_dependencies(self, article: ArticleOutlineBase) -> List[ArticleOutlineBase]:
224
- """Gathers dependencies for all sections and subsections in the article.
225
-
226
- This method should be called after the article is fully constructed.
227
- """
228
- depends = [ok(a.deref(self)) for a in article.depend_on]
229
-
230
- supports = []
231
- for a in self.iter_dfs_rev():
232
- if article in {ok(b.deref(self)) for b in a.support_to}:
233
- supports.append(a)
234
-
235
- return list(set(depends + supports))
236
-
237
- def gather_dependencies_recursive(self, article: ArticleOutlineBase) -> List[ArticleOutlineBase]:
238
- """Gathers all dependencies recursively for the given article.
239
-
240
- Args:
241
- article (ArticleOutlineBase): The article to gather dependencies for.
242
-
243
- Returns:
244
- List[ArticleBase]: A list of all dependencies for the given article.
245
- """
246
- q = self.gather_dependencies(article)
247
-
248
- deps = []
249
- while q:
250
- a = q.pop()
251
- deps.extend(self.gather_dependencies(a))
252
-
253
- deps = list(
254
- chain(
255
- filter(lambda x: isinstance(x, ArticleChapter), deps),
256
- filter(lambda x: isinstance(x, ArticleSection), deps),
257
- filter(lambda x: isinstance(x, ArticleSubsection), deps),
258
- )
259
- )
260
-
261
- # Initialize result containers
262
- formatted_code = ""
263
- processed_components = []
264
-
265
- # Process all dependencies
266
- while deps:
267
- component = deps.pop()
268
- # Skip duplicates
269
- if (component_code := component.to_typst_code()) in formatted_code:
270
- continue
271
-
272
- # Add this component
273
- formatted_code += component_code
274
- processed_components.append(component)
275
-
276
- return processed_components
277
-
278
- def iter_dfs_with_deps(
279
- self, chapter: bool = True, section: bool = True, subsection: bool = True
280
- ) -> Generator[Tuple[ArticleOutlineBase, List[ArticleOutlineBase]], None, None]:
281
- """Iterates through the article in a depth-first manner, yielding each component and its dependencies.
282
-
283
- Args:
284
- chapter (bool, optional): Whether to include chapter components. Defaults to True.
285
- section (bool, optional): Whether to include section components. Defaults to True.
286
- subsection (bool, optional): Whether to include subsection components. Defaults to True.
287
-
288
- Yields:
289
- Tuple[ArticleBase, List[ArticleBase]]: Each component and its dependencies.
290
- """
291
- if all((not chapter, not section, not subsection)):
292
- raise ValueError("At least one of chapter, section, or subsection must be True.")
293
-
294
- for component in self.iter_dfs_rev():
295
- if not chapter and isinstance(component, ArticleChapter):
296
- continue
297
- if not section and isinstance(component, ArticleSection):
298
- continue
299
- if not subsection and isinstance(component, ArticleSubsection):
300
- continue
301
- yield component, (self.gather_dependencies_recursive(component))
225
+ @classmethod
226
+ def from_mixed_source(cls, article_outline: ArticleOutline, typst_code: str) -> Self:
227
+ """Generates an article from the given outline and Typst code."""
228
+ self = cls.from_typst_code(article_outline.title, typst_code)
229
+ self.expected_word_count = article_outline.expected_word_count
230
+ self.description = article_outline.description
231
+ for a, o in zip(self.iter_dfs(), article_outline.iter_dfs(), strict=True):
232
+ a.update_metadata(o)
233
+ return self.update_ref(article_outline)