fabricatio 0.2.13.dev1__cp312-cp312-manylinux_2_34_x86_64.whl → 0.2.13.dev3__cp312-cp312-manylinux_2_34_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,18 +20,26 @@ from fabricatio.models.extra.article_main import Article, ArticleChapter, Articl
20
20
  from fabricatio.models.extra.article_outline import ArticleOutline
21
21
  from fabricatio.models.extra.rule import RuleSet
22
22
  from fabricatio.models.kwargs_types import ChooseKwargs, LLMKwargs
23
- from fabricatio.rust import convert_to_block_formula, convert_to_inline_formula
24
- from fabricatio.utils import ask_retain, ok
23
+ from fabricatio.rust import (
24
+ convert_all_block_tex,
25
+ convert_all_inline_tex,
26
+ convert_to_block_formula,
27
+ convert_to_inline_formula,
28
+ fix_misplaced_labels,
29
+ )
30
+ from fabricatio.utils import ok
25
31
 
26
32
  TYPST_CITE_USAGE = (
27
- "citation number is REQUIRED to cite any reference!,for example in Auther Pattern: 'Doe et al.[[1]], Jack et al.[[2]]' or in Sentence Suffix Sattern: 'Global requirement is incresing[[1]].'\n"
28
- "Everything is build upon the typst language, which is similar to latex, \n"
33
+ "citation number is REQUIRED to cite any reference!'\n"
29
34
  "Legal citing syntax examples(seperated by |): [[1]]|[[1,2]]|[[1-3]]|[[12,13-15]]|[[1-3,5-7]]\n"
30
35
  "Illegal citing syntax examples(seperated by |): [[1],[2],[3]]|[[1],[1-2]]\n"
31
- "Those reference mark shall not be omitted during the extraction\n"
36
+ "You SHALL not cite a single reference more than once!"
32
37
  "It's recommended to cite multiple references that supports your conclusion at a time.\n"
38
+ )
39
+
40
+ TYPST_MATH_USAGE = (
33
41
  "Wrap inline expression with '\\(' and '\\)',like '\\(>5m\\)' '\\(89%\\)', and wrap block equation with '\\[' and '\\]'.\n"
34
- "In addition to that, you can add a label outside the block equation which can be used as a cross reference identifier, the label is a string wrapped in `<` and `>` like `<energy-release-rate-equation>`.Note that the label string should be a summarizing title for the equation being labeled.\n"
42
+ "In addition to that, you can add a label outside the block equation which can be used as a cross reference identifier, the label is a string wrapped in `<` and `>` like `<energy-release-rate-equation>`.Note that the label string should be a summarizing title for the equation being labeled and should never be written within the formula block.\n"
35
43
  "you can refer to that label by using the syntax with prefix of `@eqt:`, which indicate that this notation is citing a label from the equations. For example ' @eqt:energy-release-rate-equation ' DO remember that the notation shall have both suffixed and prefixed space char which enable the compiler to distinguish the notation from the plaintext."
36
44
  "Below is two usage example:\n"
37
45
  "```typst\n"
@@ -44,9 +52,10 @@ TYPST_CITE_USAGE = (
44
52
  )
45
53
 
46
54
 
47
- class WriteArticleContentRAG(Action, RAG, Extract):
55
+ class WriteArticleContentRAG(Action, Extract, AdvancedRAG):
48
56
  """Write an article based on the provided outline."""
49
57
 
58
+ ctx_override: ClassVar[bool] = True
50
59
  search_increment_multiplier: float = 1.6
51
60
  """The increment multiplier of the search increment."""
52
61
  ref_limit: int = 35
@@ -55,15 +64,19 @@ class WriteArticleContentRAG(Action, RAG, Extract):
55
64
  """The threshold of relevance"""
56
65
  extractor_model: LLMKwargs
57
66
  """The model to use for extracting the content from the retrieved references."""
58
- query_model: LLMKwargs
67
+ query_model: ChooseKwargs
59
68
  """The model to use for querying the database"""
60
69
  supervisor: bool = False
61
70
  """Whether to use supervisor mode"""
62
71
  result_per_query: PositiveInt = 4
63
72
  """The number of results to be returned per query."""
64
- req: str = TYPST_CITE_USAGE
73
+ cite_req: str = TYPST_CITE_USAGE
65
74
  """The req of the write article content."""
66
75
 
76
+ math_req: str = TYPST_MATH_USAGE
77
+ """The req of the write article content."""
78
+ tei_endpoint: Optional[str] = None
79
+
67
80
  async def _execute(
68
81
  self,
69
82
  article_outline: ArticleOutline,
@@ -103,30 +116,25 @@ class WriteArticleContentRAG(Action, RAG, Extract):
103
116
  cm = CitationManager()
104
117
  await self.search_database(article, article_outline, chap, sec, subsec, cm)
105
118
 
106
- raw = await self.write_raw(article, article_outline, chap, sec, subsec, cm)
107
- r_print(raw)
119
+ raw_paras = await self.write_raw(article, article_outline, chap, sec, subsec, cm)
120
+ r_print(raw_paras)
108
121
 
109
122
  while not await confirm("Accept this version and continue?").ask_async():
110
123
  if inst := await text("Search for more refs for additional spec.").ask_async():
111
- await self.search_database(
112
- article,
113
- article_outline,
114
- chap,
115
- sec,
116
- subsec,
117
- cm,
118
- supervisor=True,
119
- extra_instruction=inst,
120
- )
124
+ await self.search_database(article, article_outline, chap, sec, subsec, cm, extra_instruction=inst)
121
125
 
122
126
  if instruction := await text("Enter the instructions to improve").ask_async():
123
- raw = await self.write_raw(article, article_outline, chap, sec, subsec, cm, instruction)
124
- if edt := await text("Edit", default=raw).ask_async():
125
- raw = edt
127
+ raw_paras = await self.write_raw(article, article_outline, chap, sec, subsec, cm, instruction)
128
+ if edt := await text("Edit", default=raw_paras).ask_async():
129
+ raw_paras = edt
126
130
 
127
- r_print(raw)
131
+ raw_paras = fix_misplaced_labels(raw_paras)
132
+ raw_paras = convert_all_inline_tex(raw_paras)
133
+ raw_paras = convert_all_block_tex(raw_paras)
128
134
 
129
- return await self.extract_new_subsec(subsec, raw, cm)
135
+ r_print(raw_paras)
136
+
137
+ return await self.extract_new_subsec(subsec, raw_paras, cm)
130
138
 
131
139
  async def _inner(
132
140
  self,
@@ -144,6 +152,10 @@ class WriteArticleContentRAG(Action, RAG, Extract):
144
152
 
145
153
  raw_paras = "\n".join(p for p in raw_paras.splitlines() if p and not p.endswith("**") and not p.startswith("#"))
146
154
 
155
+ raw_paras = fix_misplaced_labels(raw_paras)
156
+ raw_paras = convert_all_inline_tex(raw_paras)
157
+ raw_paras = convert_all_block_tex(raw_paras)
158
+
147
159
  return await self.extract_new_subsec(subsec, raw_paras, cm)
148
160
 
149
161
  async def extract_new_subsec(
@@ -155,12 +167,13 @@ class WriteArticleContentRAG(Action, RAG, Extract):
155
167
  ArticleSubsection,
156
168
  raw_paras,
157
169
  f"Above is the subsection titled `{subsec.title}`.\n"
158
- f"I need you to extract the content to update my subsection obj provided below.\n{self.req}"
159
- f"{subsec.display()}\n",
170
+ f"I need you to extract the content to construct a new `{ArticleSubsection.__class__.__name__}`,"
171
+ f"Do not attempt to change the original content, your job is ONLY content extraction",
160
172
  **self.extractor_model,
161
173
  ),
162
174
  "Failed to propose new subsection.",
163
175
  )
176
+
164
177
  for p in new_subsec.paragraphs:
165
178
  p.content = cm.apply(p.content)
166
179
  p.description = cm.apply(p.description)
@@ -184,7 +197,7 @@ class WriteArticleContentRAG(Action, RAG, Extract):
184
197
  f"{article_outline.finalized_dump()}\n\nAbove is my article outline, I m writing graduate thesis titled `{article.title}`. "
185
198
  f"More specifically, i m witting the Chapter `{chap.title}` >> Section `{sec.title}` >> Subsection `{subsec.title}`.\n"
186
199
  f"Please help me write the paragraphs of the subsec mentioned above, which is `{subsec.title}`.\n"
187
- f"{self.req}\n"
200
+ f"{self.cite_req}\n{self.math_req}\n"
188
201
  f"You SHALL use `{article.language}` as writing language.\n{extra_instruction}\n"
189
202
  f"Do not use numbered list to display the outcome, you should regard you are writing the main text of the thesis.\n"
190
203
  f"You should not copy others' works from the references directly on to my thesis, we can only harness the conclusion they have drawn.\n"
@@ -200,7 +213,6 @@ class WriteArticleContentRAG(Action, RAG, Extract):
200
213
  subsec: ArticleSubsection,
201
214
  cm: CitationManager,
202
215
  extra_instruction: str = "",
203
- supervisor: bool = False,
204
216
  ) -> None:
205
217
  """Search database for related references."""
206
218
  search_req = (
@@ -208,51 +220,20 @@ class WriteArticleContentRAG(Action, RAG, Extract):
208
220
  f"More specifically, i m witting the Chapter `{chap.title}` >> Section `{sec.title}` >> Subsection `{subsec.title}`.\n"
209
221
  f"I need to search related references to build up the content of the subsec mentioned above, which is `{subsec.title}`.\n"
210
222
  f"provide 10~16 queries as possible, to get best result!\n"
211
- f"You should provide both English version and chinese version of the refined queries!\n{extra_instruction}\n"
212
- )
213
-
214
- ref_q = ok(
215
- await self.arefined_query(
216
- search_req,
217
- **self.query_model,
218
- ),
219
- "Failed to refine query.",
223
+ f"You should provide both English version and chinese version of the refined queries!\n{extra_instruction}"
220
224
  )
221
225
 
222
- if supervisor:
223
- ref_q = await ask_retain(ref_q)
224
- ret = await self.aretrieve(
225
- ref_q,
226
- ArticleChunk,
227
- max_accepted=self.ref_limit,
226
+ await self.clued_search(
227
+ search_req,
228
+ cm,
229
+ refinery_kwargs=self.query_model,
230
+ expand_multiplier=self.search_increment_multiplier,
231
+ base_accepted=self.ref_limit,
228
232
  result_per_query=self.result_per_query,
229
233
  similarity_threshold=self.threshold,
234
+ tei_endpoint=self.tei_endpoint,
230
235
  )
231
236
 
232
- cm.add_chunks(ok(ret))
233
- ref_q = await self.arefined_query(
234
- f"{cm.as_prompt()}\n\nAbove is the retrieved references in the first RAG, now we need to perform the second RAG.\n\n{search_req}",
235
- **self.query_model,
236
- )
237
-
238
- if ref_q is None:
239
- logger.warning("Second refine query is None, skipping.")
240
- return
241
- if supervisor:
242
- ref_q = await ask_retain(ref_q)
243
-
244
- ret = await self.aretrieve(
245
- ref_q,
246
- ArticleChunk,
247
- max_accepted=int(self.ref_limit * self.search_increment_multiplier),
248
- result_per_query=int(self.result_per_query * self.search_increment_multiplier),
249
- similarity_threshold=self.threshold,
250
- )
251
- if ret is None:
252
- logger.warning("Second retrieve is None, skipping.")
253
- return
254
- cm.add_chunks(ret)
255
-
256
237
 
257
238
  class ArticleConsultRAG(Action, AdvancedRAG):
258
239
  """Write an article based on the provided outline."""
@@ -261,9 +242,9 @@ class ArticleConsultRAG(Action, AdvancedRAG):
261
242
  output_key: str = "consult_count"
262
243
  search_increment_multiplier: float = 1.6
263
244
  """The multiplier to increase the limit of references to retrieve per query."""
264
- ref_limit: int = 20
245
+ ref_limit: int = 26
265
246
  """The final limit of references."""
266
- ref_per_q: int = 3
247
+ ref_per_q: int = 13
267
248
  """The limit of references to retrieve per query."""
268
249
  similarity_threshold: float = 0.62
269
250
  """The similarity threshold of references to retrieve."""
@@ -7,6 +7,7 @@ from fabricatio.journal import logger
7
7
  from fabricatio.models.adv_kwargs_types import FetchKwargs
8
8
  from fabricatio.models.extra.aricle_rag import ArticleChunk, CitationManager
9
9
  from fabricatio.models.kwargs_types import ChooseKwargs
10
+ from fabricatio.utils import fallback_kwargs
10
11
 
11
12
 
12
13
  class AdvancedRAG(RAG):
@@ -40,10 +41,13 @@ class AdvancedRAG(RAG):
40
41
  f"\n\n{requirement}",
41
42
  **refinery_kwargs,
42
43
  )
44
+
43
45
  if ref_q is None:
44
46
  logger.error(f"At round [{i}/{max_round}] search, failed to refine the query, exit.")
45
47
  return cm
46
- refs = await self.aretrieve(ref_q, ArticleChunk, base_accepted, **kwargs)
48
+ refs = await self.aretrieve(
49
+ ref_q, ArticleChunk, base_accepted, **fallback_kwargs(kwargs, filter_expr=cm.as_milvus_filter_expr())
50
+ )
47
51
 
48
52
  if (max_capacity := max_capacity - len(refs)) < 0:
49
53
  cm.add_chunks(refs[0:max_capacity])
@@ -150,6 +150,7 @@ class RAG(EmbeddingUsage):
150
150
  result_per_query: int = 10,
151
151
  tei_endpoint: Optional[str] = None,
152
152
  reranker_threshold: float = 0.7,
153
+ filter_expr: str = "",
153
154
  ) -> List[D]:
154
155
  """Asynchronously fetches documents from a Milvus database based on input vectors.
155
156
 
@@ -162,6 +163,7 @@ class RAG(EmbeddingUsage):
162
163
  result_per_query (int): The maximum number of results to return per query. Defaults to 10.
163
164
  tei_endpoint (str): the endpoint of the TEI api.
164
165
  reranker_threshold (float): The threshold used to filtered low relativity document.
166
+ filter_expr (str): filter_expression parsed into pymilvus search.
165
167
 
166
168
  Returns:
167
169
  List[D]: A list of document objects created from the fetched data.
@@ -172,6 +174,7 @@ class RAG(EmbeddingUsage):
172
174
  await self.vectorize(query),
173
175
  search_params={"radius": similarity_threshold},
174
176
  output_fields=list(document_model.model_fields),
177
+ filter=filter_expr,
175
178
  limit=result_per_query,
176
179
  )
177
180
  if tei_endpoint is not None:
@@ -184,8 +187,11 @@ class RAG(EmbeddingUsage):
184
187
 
185
188
  for q, g in zip(query, search_results, strict=True):
186
189
  models = document_model.from_sequence([res["entity"] for res in g if res["id"] not in retrieved_id])
190
+ logger.debug(f"Retrived {len(g)} raw document, filtered out {len(models)}.")
187
191
  retrieved_id.update(res["id"] for res in g)
188
- rank_scores = await reranker.arerank(q, [m.prepare_vectorization() for m in models])
192
+ if not models:
193
+ continue
194
+ rank_scores = await reranker.arerank(q, [m.prepare_vectorization() for m in models], truncate=True)
189
195
  raw_result.extend(
190
196
  (models[s["index"]], s["score"]) for s in rank_scores if s["score"] > reranker_threshold
191
197
  )
@@ -60,3 +60,4 @@ if find_spec("pymilvus"):
60
60
  result_per_query: NotRequired[int]
61
61
  tei_endpoint: NotRequired[Optional[str]]
62
62
  reranker_threshold: NotRequired[float]
63
+ filter_expr: NotRequired[str]
@@ -1,6 +1,7 @@
1
1
  """A Module containing the article rag models."""
2
2
 
3
3
  import re
4
+ from itertools import groupby
4
5
  from pathlib import Path
5
6
  from typing import ClassVar, Dict, List, Optional, Self, Unpack
6
7
 
@@ -10,12 +11,13 @@ from fabricatio.models.extra.rag import MilvusDataBase
10
11
  from fabricatio.models.generic import AsPrompt
11
12
  from fabricatio.models.kwargs_types import ChunkKwargs
12
13
  from fabricatio.rust import BibManager, blake3_hash, split_into_chunks
13
- from fabricatio.utils import ok
14
+ from fabricatio.utils import ok, wrapp_in_block
15
+ from more_itertools.more import first
14
16
  from more_itertools.recipes import flatten, unique
15
17
  from pydantic import Field
16
18
 
17
19
 
18
- class ArticleChunk(MilvusDataBase, AsPrompt):
20
+ class ArticleChunk(MilvusDataBase):
19
21
  """The chunk of an article."""
20
22
 
21
23
  etc_word: ClassVar[str] = "等"
@@ -51,10 +53,9 @@ class ArticleChunk(MilvusDataBase, AsPrompt):
51
53
  bibtex_cite_key: str
52
54
  """The bibtex cite key of the article"""
53
55
 
54
- def _as_prompt_inner(self) -> Dict[str, str]:
55
- return {
56
- f"[[{ok(self._cite_number, 'You need to update cite number first.')}]] reference `{self.article_title}` from {self.as_auther_seq()}": self.chunk
57
- }
56
+ @property
57
+ def reference_header(self) -> str:
58
+ return f"[[{ok(self._cite_number, 'You need to update cite number first.')}]] reference `{self.article_title}` from {self.as_auther_seq()}"
58
59
 
59
60
  @property
60
61
  def cite_number(self) -> int:
@@ -204,13 +205,23 @@ class CitationManager(AsPrompt):
204
205
 
205
206
  def set_cite_number_all(self) -> Self:
206
207
  """Set citation numbers for all article chunks."""
207
- for i, a in enumerate(self.article_chunks, 1):
208
- a.update_cite_number(i)
208
+ number_mapping = {a.bibtex_cite_key: 0 for a in self.article_chunks}
209
+
210
+ for i, k in enumerate(number_mapping.keys()):
211
+ number_mapping[k] = i
212
+
213
+ for a in self.article_chunks:
214
+ a.update_cite_number(number_mapping[a.bibtex_cite_key])
209
215
  return self
210
216
 
211
217
  def _as_prompt_inner(self) -> Dict[str, str]:
212
218
  """Generate prompt inner representation."""
213
- return {"References": "\n".join(r.as_prompt() for r in self.article_chunks)}
219
+ seg = []
220
+ for k, g in groupby(self.article_chunks, key=lambda a: a.bibtex_cite_key):
221
+ g = list(g)
222
+ logger.debug(f"Group [{k}]: {len(g)}")
223
+ seg.append(wrapp_in_block("\n\n".join(a.chunk for a in g), first(g).reference_header))
224
+ return {"References": "\n".join(seg)}
214
225
 
215
226
  def apply(self, string: str) -> str:
216
227
  """Apply citation replacements to the input string."""
@@ -261,5 +272,9 @@ class CitationManager(AsPrompt):
261
272
 
262
273
  def unpack_cite_seq(self, citation_seq: List[int]) -> str:
263
274
  """Unpack citation sequence into a string."""
264
- chunk_seq = [a for a in self.article_chunks if a.cite_number in citation_seq]
265
- return "".join(a.as_typst_cite() for a in chunk_seq)
275
+ chunk_seq = {a.bibtex_cite_key: a for a in self.article_chunks if a.cite_number in citation_seq}
276
+ return "".join(a.as_typst_cite() for a in chunk_seq.values())
277
+
278
+ def as_milvus_filter_expr(self, blacklist: bool = True) -> str:
279
+ if blacklist:
280
+ return " and ".join(f'bibtex_cite_key != "{a.bibtex_cite_key}"' for a in self.article_chunks)
@@ -2,9 +2,12 @@
2
2
 
3
3
  from abc import ABC
4
4
  from enum import StrEnum
5
+ from pathlib import Path
5
6
  from typing import ClassVar, Generator, List, Optional, Self, Tuple, Type
6
7
 
8
+ from fabricatio.fs import dump_text, safe_text_read
7
9
  from fabricatio.fs.readers import extract_sections
10
+ from fabricatio.journal import logger
8
11
  from fabricatio.models.generic import (
9
12
  AsPrompt,
10
13
  Described,
@@ -19,10 +22,12 @@ from fabricatio.models.generic import (
19
22
  Titled,
20
23
  WordCount,
21
24
  )
22
- from fabricatio.rust import split_out_metadata, to_metadata, word_count
23
- from fabricatio.utils import fallback_kwargs
25
+ from fabricatio.rust import extract_body, inplace_update, split_out_metadata, to_metadata, word_count
26
+ from fabricatio.utils import fallback_kwargs, ok
24
27
  from pydantic import Field
25
28
 
29
+ ARTICLE_WRAPPER = "// =-=-=-=-=-=-=-=-=-="
30
+
26
31
 
27
32
  class ReferringType(StrEnum):
28
33
  """Enumeration of different types of references that can be made in an article."""
@@ -397,3 +402,23 @@ class ArticleBase[T: ChapterBase](FinalizedDumpAble, AsPrompt, FromTypstCode, To
397
402
  def avg_wordcount_recursive[S: "ArticleBase"](self: S) -> S:
398
403
  """Set all chap, sec, subsec have same word count sum up to be `self.expected_word_count`."""
399
404
  return self.avg_chap_wordcount().avg_sec_wordcount().avg_subsec_wordcount()
405
+
406
+ def update_article_file(self, file: str | Path) -> Self:
407
+ """Update the article file."""
408
+ file = Path(file)
409
+ string = safe_text_read(file)
410
+ if updated := inplace_update(string, ARTICLE_WRAPPER, self.to_typst_code()):
411
+ dump_text(file, updated)
412
+ logger.success(f"Successfully updated {file.as_posix()}.")
413
+ else:
414
+ logger.warning(f"Failed to update {file.as_posix()}. Please make sure there are paired `{ARTICLE_WRAPPER}`")
415
+ return self
416
+
417
+ @classmethod
418
+ def from_article_file(cls, file: str | Path, title: str) -> Self:
419
+ """Load article from file."""
420
+ file = Path(file)
421
+ string = safe_text_read(file)
422
+ return cls.from_typst_code(
423
+ title, ok(extract_body(string, ARTICLE_WRAPPER), "Failed to extract body from file.")
424
+ )
@@ -53,6 +53,7 @@ class Paragraph(SketchedAble, WordCount, Described):
53
53
 
54
54
  @property
55
55
  def exact_wordcount(self) -> int:
56
+ """Calculates the exact word count of the content."""
56
57
  return word_count(self.content)
57
58
 
58
59
 
@@ -1,8 +1,7 @@
1
1
  """A module containing the ArticleOutline class, which represents the outline of an academic paper."""
2
2
 
3
- from typing import Dict, Self
3
+ from typing import ClassVar, Dict, Type
4
4
 
5
- from fabricatio.fs.readers import extract_sections
6
5
  from fabricatio.models.extra.article_base import (
7
6
  ArticleBase,
8
7
  ChapterBase,
@@ -19,36 +18,14 @@ class ArticleSubsectionOutline(SubSectionBase):
19
18
 
20
19
  class ArticleSectionOutline(SectionBase[ArticleSubsectionOutline]):
21
20
  """A slightly more detailed research component specification for academic paper generation, Must contain subsections."""
22
- @classmethod
23
- def from_typst_code(cls, title: str, body: str, **kwargs) -> Self:
24
- """Parse the given Typst code into an ArticleSectionOutline instance."""
25
- return super().from_typst_code(
26
- title,
27
- body,
28
- subsections=[
29
- ArticleSubsectionOutline.from_typst_code(*pack)
30
- for pack in extract_sections(body, level=3, section_char="=")
31
- ],
32
- )
33
21
 
22
+ child_type: ClassVar[Type[SubSectionBase]] = ArticleSubsectionOutline
34
23
 
35
24
 
36
25
  class ArticleChapterOutline(ChapterBase[ArticleSectionOutline]):
37
26
  """Macro-structural unit implementing standard academic paper organization. Must contain sections."""
38
27
 
39
- @classmethod
40
- def from_typst_code(cls, title: str, body: str, **kwargs) -> Self:
41
- """Parse the given Typst code into an ArticleChapterOutline instance."""
42
- return super().from_typst_code(
43
- title,
44
- body,
45
- sections=[
46
- ArticleSectionOutline.from_typst_code(*pack)
47
- for pack in extract_sections(body, level=2, section_char="=")
48
- ],
49
-
50
- )
51
-
28
+ child_type: ClassVar[Type[SectionBase]] = ArticleSectionOutline
52
29
 
53
30
 
54
31
  class ArticleOutline(
@@ -58,21 +35,11 @@ class ArticleOutline(
58
35
  ):
59
36
  """Outline of an academic paper, containing chapters, sections, subsections."""
60
37
 
38
+ child_type: ClassVar[Type[ChapterBase]] = ArticleChapterOutline
39
+
61
40
  def _as_prompt_inner(self) -> Dict[str, str]:
62
41
  return {
63
42
  "Original Article Briefing": self.referenced.referenced,
64
43
  "Original Article Proposal": self.referenced.display(),
65
44
  "Original Article Outline": self.display(),
66
45
  }
67
-
68
- @classmethod
69
- def from_typst_code(cls, title: str, body: str, **kwargs) -> Self:
70
- """Parse the given Typst code into an ArticleOutline instance."""
71
- return super().from_typst_code(
72
- title,
73
- body,
74
- chapters=[
75
- ArticleChapterOutline.from_typst_code(*pack)
76
- for pack in extract_sections(body, level=1, section_char="=")
77
- ],
78
- )
fabricatio/rust.pyi CHANGED
@@ -16,6 +16,7 @@ from typing import Any, Dict, List, Optional, Tuple, overload
16
16
 
17
17
  from pydantic import JsonValue
18
18
 
19
+
19
20
  class TemplateManager:
20
21
  """Template rendering engine using Handlebars templates.
21
22
 
@@ -26,7 +27,7 @@ class TemplateManager:
26
27
  """
27
28
 
28
29
  def __init__(
29
- self, template_dirs: List[Path], suffix: Optional[str] = None, active_loading: Optional[bool] = None
30
+ self, template_dirs: List[Path], suffix: Optional[str] = None, active_loading: Optional[bool] = None
30
31
  ) -> None:
31
32
  """Initialize the template manager.
32
33
 
@@ -58,8 +59,10 @@ class TemplateManager:
58
59
 
59
60
  @overload
60
61
  def render_template(self, name: str, data: Dict[str, Any]) -> str: ...
62
+
61
63
  @overload
62
64
  def render_template(self, name: str, data: List[Dict[str, Any]]) -> List[str]: ...
65
+
63
66
  def render_template(self, name: str, data: Dict[str, Any] | List[Dict[str, Any]]) -> str | List[str]:
64
67
  """Render a template with context data.
65
68
 
@@ -76,8 +79,10 @@ class TemplateManager:
76
79
 
77
80
  @overload
78
81
  def render_template_raw(self, template: str, data: Dict[str, Any]) -> str: ...
82
+
79
83
  @overload
80
84
  def render_template_raw(self, template: str, data: List[Dict[str, Any]]) -> List[str]: ...
85
+
81
86
  def render_template_raw(self, template: str, data: Dict[str, Any] | List[Dict[str, Any]]) -> str | List[str]:
82
87
  """Render a template with context data.
83
88
 
@@ -89,6 +94,7 @@ class TemplateManager:
89
94
  Rendered template content as string or list of strings
90
95
  """
91
96
 
97
+
92
98
  class BibManager:
93
99
  """BibTeX bibliography manager for parsing and querying citation data."""
94
100
 
@@ -197,6 +203,7 @@ class BibManager:
197
203
  Field value if found, None otherwise
198
204
  """
199
205
 
206
+
200
207
  def blake3_hash(content: bytes) -> str:
201
208
  """Calculate the BLAKE3 cryptographic hash of data.
202
209
 
@@ -207,9 +214,11 @@ def blake3_hash(content: bytes) -> str:
207
214
  Hex-encoded BLAKE3 hash string
208
215
  """
209
216
 
217
+
210
218
  def detect_language(string: str) -> str:
211
219
  """Detect the language of a given string."""
212
220
 
221
+
213
222
  def split_word_bounds(string: str) -> List[str]:
214
223
  """Split the string into words based on word boundaries.
215
224
 
@@ -220,6 +229,7 @@ def split_word_bounds(string: str) -> List[str]:
220
229
  A list of words extracted from the string.
221
230
  """
222
231
 
232
+
223
233
  def split_sentence_bounds(string: str) -> List[str]:
224
234
  """Split the string into sentences based on sentence boundaries.
225
235
 
@@ -230,6 +240,7 @@ def split_sentence_bounds(string: str) -> List[str]:
230
240
  A list of sentences extracted from the string.
231
241
  """
232
242
 
243
+
233
244
  def split_into_chunks(string: str, max_chunk_size: int, max_overlapping_rate: float = 0.3) -> List[str]:
234
245
  """Split the string into chunks of a specified size.
235
246
 
@@ -242,6 +253,7 @@ def split_into_chunks(string: str, max_chunk_size: int, max_overlapping_rate: fl
242
253
  A list of chunks extracted from the string.
243
254
  """
244
255
 
256
+
245
257
  def word_count(string: str) -> int:
246
258
  """Count the number of words in the string.
247
259
 
@@ -252,51 +264,67 @@ def word_count(string: str) -> int:
252
264
  The number of words in the string.
253
265
  """
254
266
 
267
+
255
268
  def is_chinese(string: str) -> bool:
256
269
  """Check if the given string is in Chinese."""
257
270
 
271
+
258
272
  def is_english(string: str) -> bool:
259
273
  """Check if the given string is in English."""
260
274
 
275
+
261
276
  def is_japanese(string: str) -> bool:
262
277
  """Check if the given string is in Japanese."""
263
278
 
279
+
264
280
  def is_korean(string: str) -> bool:
265
281
  """Check if the given string is in Korean."""
266
282
 
283
+
267
284
  def is_arabic(string: str) -> bool:
268
285
  """Check if the given string is in Arabic."""
269
286
 
287
+
270
288
  def is_russian(string: str) -> bool:
271
289
  """Check if the given string is in Russian."""
272
290
 
291
+
273
292
  def is_german(string: str) -> bool:
274
293
  """Check if the given string is in German."""
275
294
 
295
+
276
296
  def is_french(string: str) -> bool:
277
297
  """Check if the given string is in French."""
278
298
 
299
+
279
300
  def is_hindi(string: str) -> bool:
280
301
  """Check if the given string is in Hindi."""
281
302
 
303
+
282
304
  def is_italian(string: str) -> bool:
283
305
  """Check if the given string is in Italian."""
284
306
 
307
+
285
308
  def is_dutch(string: str) -> bool:
286
309
  """Check if the given string is in Dutch."""
287
310
 
311
+
288
312
  def is_portuguese(string: str) -> bool:
289
313
  """Check if the given string is in Portuguese."""
290
314
 
315
+
291
316
  def is_swedish(string: str) -> bool:
292
317
  """Check if the given string is in Swedish."""
293
318
 
319
+
294
320
  def is_turkish(string: str) -> bool:
295
321
  """Check if the given string is in Turkish."""
296
322
 
323
+
297
324
  def is_vietnamese(string: str) -> bool:
298
325
  """Check if the given string is in Vietnamese."""
299
326
 
327
+
300
328
  def tex_to_typst(string: str) -> str:
301
329
  """Convert TeX to Typst.
302
330
 
@@ -307,6 +335,7 @@ def tex_to_typst(string: str) -> str:
307
335
  The converted Typst string.
308
336
  """
309
337
 
338
+
310
339
  def convert_all_inline_tex(string: str) -> str:
311
340
  """Convert all inline TeX code in the string.
312
341
 
@@ -317,6 +346,7 @@ def convert_all_inline_tex(string: str) -> str:
317
346
  The converted string with inline TeX code replaced.
318
347
  """
319
348
 
349
+
320
350
  def convert_all_block_tex(string: str) -> str:
321
351
  """Convert all block TeX code in the string.
322
352
 
@@ -327,6 +357,7 @@ def convert_all_block_tex(string: str) -> str:
327
357
  The converted string with block TeX code replaced.
328
358
  """
329
359
 
360
+
330
361
  def fix_misplaced_labels(string: str) -> str:
331
362
  """A func to fix labels in a string.
332
363
 
@@ -337,6 +368,7 @@ def fix_misplaced_labels(string: str) -> str:
337
368
  The fixed string with labels properly placed.
338
369
  """
339
370
 
371
+
340
372
  def comment(string: str) -> str:
341
373
  """Add comment to the string.
342
374
 
@@ -347,6 +379,7 @@ def comment(string: str) -> str:
347
379
  The string with each line prefixed by '// '.
348
380
  """
349
381
 
382
+
350
383
  def uncomment(string: str) -> str:
351
384
  """Remove comment from the string.
352
385
 
@@ -357,6 +390,7 @@ def uncomment(string: str) -> str:
357
390
  The string with comments (lines starting with '// ' or '//') removed.
358
391
  """
359
392
 
393
+
360
394
  def split_out_metadata(string: str) -> Tuple[Optional[JsonValue], str]:
361
395
  """Split out metadata from a string.
362
396
 
@@ -367,6 +401,7 @@ def split_out_metadata(string: str) -> Tuple[Optional[JsonValue], str]:
367
401
  A tuple containing the metadata as a Python object (if parseable) and the remaining string.
368
402
  """
369
403
 
404
+
370
405
  def to_metadata(data: JsonValue) -> str:
371
406
  """Convert a Python object to a YAML string.
372
407
 
@@ -377,8 +412,37 @@ def to_metadata(data: JsonValue) -> str:
377
412
  The YAML string representation of the input data.
378
413
  """
379
414
 
415
+
380
416
  def convert_to_inline_formula(string: str) -> str:
381
417
  r"""Convert `$...$` to inline formula `\(...\)` and trim spaces."""
382
418
 
419
+
383
420
  def convert_to_block_formula(string: str) -> str:
384
421
  r"""Convert `$$...$$` to block formula `\[...\]` and trim spaces."""
422
+
423
+
424
+ def inplace_update(string: str, wrapper: str, new_body: str) -> Optional[str]:
425
+ """Replace content between wrapper strings.
426
+
427
+ Args:
428
+ string: The input string containing content wrapped by delimiter strings.
429
+ wrapper: The delimiter string that marks the beginning and end of the content to replace.
430
+ new_body: The new content to place between the wrapper strings.
431
+
432
+ Returns:
433
+ A new string with the content between wrappers replaced.
434
+
435
+ """
436
+
437
+
438
+ def extract_body(string: str, wrapper: str) -> Optional[str]:
439
+ """
440
+ Extract the content between two occurrences of a wrapper string.
441
+
442
+ Args:
443
+ string: The input string containing content wrapped by delimiter strings.
444
+ wrapper: The delimiter string that marks the beginning and end of the content to extract.
445
+
446
+ Returns:
447
+ The content between the first two occurrences of the wrapper string if found, otherwise None.
448
+ """
fabricatio/utils.py CHANGED
@@ -182,7 +182,7 @@ class RerankerAPI:
182
182
  response = requests.post(f"{self.base_url}/rerank", json=payload)
183
183
 
184
184
  # Handle non-200 status codes
185
- if response.ok:
185
+ if not response.ok:
186
186
  error_data = None
187
187
  if "application/json" in response.headers.get("Content-Type", ""):
188
188
  error_data = response.json()
@@ -239,7 +239,7 @@ class RerankerAPI:
239
239
  session.post(f"{self.base_url}/rerank", json=payload) as response,
240
240
  ):
241
241
  # Handle non-200 status codes
242
- if response.ok:
242
+ if not response.ok:
243
243
  if "application/json" in response.headers.get("Content-Type", ""):
244
244
  error_data = await response.json()
245
245
  else:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fabricatio
3
- Version: 0.2.13.dev1
3
+ Version: 0.2.13.dev3
4
4
  Classifier: License :: OSI Approved :: MIT License
5
5
  Classifier: Programming Language :: Rust
6
6
  Classifier: Programming Language :: Python :: 3.12
@@ -1,36 +1,36 @@
1
- fabricatio-0.2.13.dev1.dist-info/METADATA,sha256=ho55CHS_a1lyoaMS_0IPkZhEnTE85U-DYhEdY4zhxyg,5169
2
- fabricatio-0.2.13.dev1.dist-info/WHEEL,sha256=7FgAcpQES0h1xhfN9Ugve9FTUilU6sRAr1WJ5ph2cuw,108
3
- fabricatio-0.2.13.dev1.dist-info/licenses/LICENSE,sha256=yDZaTLnOi03bi3Dk6f5IjhLUc5old2yOsihHWU0z-i0,1067
1
+ fabricatio-0.2.13.dev3.dist-info/METADATA,sha256=Dns3zHDS0D06YJFNSLlN-J4QhN9RujbuX_n-7IBw0Cc,5169
2
+ fabricatio-0.2.13.dev3.dist-info/WHEEL,sha256=7FgAcpQES0h1xhfN9Ugve9FTUilU6sRAr1WJ5ph2cuw,108
3
+ fabricatio-0.2.13.dev3.dist-info/licenses/LICENSE,sha256=yDZaTLnOi03bi3Dk6f5IjhLUc5old2yOsihHWU0z-i0,1067
4
4
  fabricatio/capabilities/check.py,sha256=TLtkUIR6tX73qR_V5TkXpdmplrmqFt4dZj32PBy81H0,8409
5
5
  fabricatio/capabilities/propose.py,sha256=vOJvmmnMBHUQB6N1AmZNFw42jf7Bl2mBRNlBK15TpNI,1942
6
6
  fabricatio/capabilities/correct.py,sha256=Et3Ud-oLZlwTVSy2XyT5UX2shT_OJ9j4HWP9b5Hntvk,10192
7
7
  fabricatio/capabilities/rating.py,sha256=nolk5iBSiOzsOqqKIh1c4YSdRLwcllo9vBHuwp1dV74,17432
8
8
  fabricatio/capabilities/censor.py,sha256=j6vyjKpR1CfLzC-XrOZSZePjJz3jsoM104gqqsWwi1Q,4615
9
- fabricatio/capabilities/advanced_rag.py,sha256=y1XMENFdGGr0AcXZHgloRM9jX2yJpPEM-q0Y9Z-EI1k,2320
9
+ fabricatio/capabilities/advanced_rag.py,sha256=ZCrzKMvL4PldEuXyQh_prhJifS98RWsxM43exfGq1w8,2453
10
10
  fabricatio/capabilities/task.py,sha256=_BAQonNy5JH3JxhLmPGfn0nDvn_ENKXyOdql8EVXRLE,4362
11
- fabricatio/capabilities/rag.py,sha256=8unTYyyzeRifLrMAYlN_SrICfwkvoWHZPJcTljWTXXc,10485
11
+ fabricatio/capabilities/rag.py,sha256=86ooIq4Oy2lYbFRBXH7q76OLF7lVSWXu5Apyj1Bx07E,10794
12
12
  fabricatio/capabilities/extract.py,sha256=b4_Tuc9O6Pe71y4Tj-JHMb4simdhduVR-rcfD9yW8RA,2425
13
13
  fabricatio/capabilities/advanced_judge.py,sha256=bvb8fYoiKoGlBwMZVMflVE9R2MoS1VtmZAo65jMJFew,683
14
14
  fabricatio/capabilities/review.py,sha256=EPL8IlxSKO0XStBkXdW7FJMbPztDQMv9w7tHgu6r3PM,4948
15
15
  fabricatio/capabilities/__init__.py,sha256=skaJ43CqAQaZMH-mCRzF4Fps3x99P2SwJ8vSM9pInX8,56
16
16
  fabricatio/parser.py,sha256=rMXd9Lo5TjxUkI0rocYigF9d1kC0rSySenuMW8uqXm8,6483
17
17
  fabricatio/models/action.py,sha256=_8iwX7BJWUOUKzM0Zn6B7jSrjRPJgzr88vTiYVHxRgE,10330
18
- fabricatio/models/extra/article_outline.py,sha256=B_qMldX_vxPZ52uvCp124R4vVYFFYPjUjLJc0-_lGog,2715
18
+ fabricatio/models/extra/article_outline.py,sha256=K3Ajb86JQSsjo61briVCkIJkqRwvJ46uNU94NCrW-cY,1584
19
19
  fabricatio/models/extra/article_essence.py,sha256=zUfZ2_bX3h__RaVPwJlxQ-tkFyfSV8SdX8DsmFX6v_w,2649
20
- fabricatio/models/extra/article_main.py,sha256=Ppgzqj28ATECkvevQUxRNBy56GMqhAc376R1HE1Kobo,10866
20
+ fabricatio/models/extra/article_main.py,sha256=RT4GRywxbR_ExOpYufe8j320EWT4nbnd7ASfcWIK-l0,10928
21
21
  fabricatio/models/extra/article_proposal.py,sha256=4G2qLkMxtK54G1ANgPW0G3w4Pahxgk2lhGPU5KMxuzw,1818
22
- fabricatio/models/extra/article_base.py,sha256=Kyfuvv9S6jTQafCdq1_DhzsimsK72iViuB1GqrihQQY,15298
22
+ fabricatio/models/extra/article_base.py,sha256=KSAAtShI1sSgpc8JxKxz4eLDBhquGABedsZUuwHX0_4,16338
23
23
  fabricatio/models/extra/rag.py,sha256=RWv_YJhDX6UL4t3sRtQt-LYMtxN-K-t931nmyiJXkKM,3857
24
24
  fabricatio/models/extra/rule.py,sha256=b756_XmWeDoJ1qOFEGy6ZfP8O7rBjOZs4XvfZvWKXXI,2574
25
25
  fabricatio/models/extra/problem.py,sha256=1Sd8hsThQK6pXMXhErRhP1ft58z4PvqeB8AV8VcXiaI,7051
26
26
  fabricatio/models/extra/patches.py,sha256=_ghmnlvTZQq7UJyaH77mTZE9abjvxRJ2mgWHUbezUls,977
27
27
  fabricatio/models/extra/advanced_judge.py,sha256=CKPP4Lseb_Ey8Y7i2V9HJfB-mZgCknFdqq7Zo41o6s4,1060
28
- fabricatio/models/extra/aricle_rag.py,sha256=-w1fxs5PrsLTYPmNtUhWSeucQ9evnasUB75aMlzutL0,10722
28
+ fabricatio/models/extra/aricle_rag.py,sha256=RkA1Q0Mh81c4bUn_b8EAmogvsna_doI7j3yah64fmz8,11405
29
29
  fabricatio/models/extra/__init__.py,sha256=0R9eZsCNu6OV-Xtf15H7FrqhfHTFBFf3fBrcd7ChsJ0,53
30
30
  fabricatio/models/usages.py,sha256=FVRhh_AulXlJF9uUmJzKEdiLz-di0rAiaQm4snYEid0,32571
31
31
  fabricatio/models/events.py,sha256=-9Xy8kcZug1tYwxmt3GpXtCkNfZUMSFvAH5HdZoRJTI,4030
32
32
  fabricatio/models/generic.py,sha256=oUsYgAx2LmA35ePlavGvT-UjUqbL-a-4-5GuPPUAtvo,30442
33
- fabricatio/models/adv_kwargs_types.py,sha256=iHYV4uB5YQPdywxg2vACLFbqHCLJCDek26aMb3ByWkY,1996
33
+ fabricatio/models/adv_kwargs_types.py,sha256=nmj1D0GVosZxKcdiw-B5vJB04Whr5zh30ZBJntSZUpY,2034
34
34
  fabricatio/models/role.py,sha256=5SJ1Vm6H3FwOVEk5Z-4GBJWABI3OKAKwkz5t170osi8,2855
35
35
  fabricatio/models/task.py,sha256=O4v5T3HuzYblGeeqNzTDOCbulhGovR6olV2ojD0FJvk,10785
36
36
  fabricatio/models/kwargs_types.py,sha256=ts2P7dWAiy3knWvLVzJMVIl5TNrODbaoA7YhIP6CuD8,4826
@@ -39,12 +39,12 @@ fabricatio/config.py,sha256=WL7lGN_XD98NHXuPYi95HWUY-xnGxXJxxqSFk0xVPRA,17696
39
39
  fabricatio/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
40
  fabricatio/core.py,sha256=MaEKZ6DDmbdScAY-7F1gwGA6fr7ADX6Mz5rNVi2msFA,6277
41
41
  fabricatio/constants.py,sha256=JxtaKGTf0IQhM-MNCHtr6x85Ejg8FWYcie-Z_RupCBg,557
42
- fabricatio/rust.pyi,sha256=9GXuLBMTmRfea9PUBZgycYACly02kCaGzDb1YGgg0g8,11207
42
+ fabricatio/rust.pyi,sha256=14jXpVNtxBGq5Yez6SNQG5mGWkjayAnOSGiF76xdQ2k,12192
43
43
  fabricatio/actions/article.py,sha256=7N2TJARtN7iBWagmrtTI7Zqcp7U_8yxzKP6eB0t4PiM,12241
44
44
  fabricatio/actions/rules.py,sha256=07ILsiwR250AUcKLPHTUPpWD_mPhPCfWKSkEAKcPv3A,3557
45
45
  fabricatio/actions/output.py,sha256=lTvMgXzY-fwA_kNrivdFZkk3kT8DMpjBSIWLyav2B1k,8089
46
46
  fabricatio/actions/rag.py,sha256=-bA7KkZEFfWEanAPHzYwRHG7zRlTZcNDI7HL3n-lDuE,3496
47
- fabricatio/actions/article_rag.py,sha256=ri6EL2V8CHY6geheI4URfTUUFbWBN5Tq0GytMQe5wtk,18665
47
+ fabricatio/actions/article_rag.py,sha256=6y-GnaBQ36I2y2LM3MnU9JFppFFNXo3IsOBbbfAH6rs,18009
48
48
  fabricatio/actions/fs.py,sha256=nlTmk-tYDW158nz_fzlsNfuYJwj7j4BHn_MFY5hxdqs,934
49
49
  fabricatio/actions/__init__.py,sha256=ZMa1LeM5BNeqp-J-D32W-f5bD53-kdXGyt0zuueJofM,47
50
50
  fabricatio/fs/curd.py,sha256=x7Je9V1ydv-BdZTjlLc3syZ6380gkOhpfrfnhXstisg,4624
@@ -58,10 +58,10 @@ fabricatio/toolboxes/arithmetic.py,sha256=sSTPkKI6-mb278DwQKFO9jKyzc9kCx45xNH7V6
58
58
  fabricatio/toolboxes/fs.py,sha256=OQMdeokYxSNVrCZJAweJ0cYiK4k2QuEiNdIbS5IHIV8,705
59
59
  fabricatio/toolboxes/__init__.py,sha256=dYm_Gd8XolSU_h4wnkA09dlaLDK146eeFz0CUgPZ8_c,380
60
60
  fabricatio/rust_instances.py,sha256=i5fIt6XkE8UwUU4JarmPt50AZs8aJW6efaypSLGLl0I,303
61
- fabricatio/utils.py,sha256=QsS9kIly_4uEzzPCid1V8ZDfkKtSxWzoLVpQXjCXxD8,10036
61
+ fabricatio/utils.py,sha256=ocQMezvAiLxV7FyHrPi-H4Wp5xKJymuyntPGpHyOHCc,10044
62
62
  fabricatio/journal.py,sha256=Op0wC-JlZumnAc_aDmYM4ljnSNLoKEEMfcIRbCF69ow,455
63
63
  fabricatio/__init__.py,sha256=OXoMMHJKHEB_vN97_34U4I5QpAKL9xnVQEVcBCvwBCg,986
64
- fabricatio/rust.cpython-312-x86_64-linux-gnu.so,sha256=Z4iO633RTLw3qF0bzJG7EneVa3c2zhWdYi8WzRLVb10,4735000
65
- fabricatio-0.2.13.dev1.data/scripts/tdown,sha256=aY-0gG_0xs7kJuvYyHT1ol57cIgam2tXhoRSCsLOKbE,4587880
66
- fabricatio-0.2.13.dev1.data/scripts/ttm,sha256=Q3E4jfmKWP4hIOU8JZ54I1wbSlPg3m72qCFxlr3pUNc,3924408
67
- fabricatio-0.2.13.dev1.dist-info/RECORD,,
64
+ fabricatio/rust.cpython-312-x86_64-linux-gnu.so,sha256=70ZYQcZMvFooMDJQe-ty67zN8ohGRmOt4mi8jQ4dcYU,4748624
65
+ fabricatio-0.2.13.dev3.data/scripts/tdown,sha256=4Liv9ixP-750aUzsZzKrvF76ZyJRG3Ay8mKPFdUe-6k,4592872
66
+ fabricatio-0.2.13.dev3.data/scripts/ttm,sha256=GA53bWxU6I-xNgtBMtulVwlCN5rcKLSrq0ddEK2N2Nc,3921584
67
+ fabricatio-0.2.13.dev3.dist-info/RECORD,,
Binary file