fabricatio 0.2.10.dev0__cp312-cp312-win_amd64.whl → 0.2.10.dev1__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,7 @@ from asyncio import gather
4
4
  from pathlib import Path
5
5
  from typing import Callable, List, Optional
6
6
 
7
+ from fabricatio.rust import BibManager, detect_language
7
8
  from more_itertools import filter_map
8
9
 
9
10
  from fabricatio.capabilities.censor import Censor
@@ -17,7 +18,6 @@ from fabricatio.models.extra.article_outline import ArticleOutline
17
18
  from fabricatio.models.extra.article_proposal import ArticleProposal
18
19
  from fabricatio.models.extra.rule import RuleSet
19
20
  from fabricatio.models.task import Task
20
- from fabricatio.rust import BibManager, detect_language
21
21
  from fabricatio.utils import ok
22
22
 
23
23
 
@@ -78,7 +78,7 @@ class FixArticleEssence(Action):
78
78
  out = []
79
79
  count = 0
80
80
  for a in article_essence:
81
- if key := (bib_mgr.get_cite_key(a.title) or bib_mgr.get_cite_key_fuzzy(a.title)):
81
+ if key := (bib_mgr.get_cite_key_by_title(a.title) or bib_mgr.get_cite_key_fuzzy(a.title)):
82
82
  a.title = bib_mgr.get_title_by_key(key) or a.title
83
83
  a.authors = bib_mgr.get_author_by_key(key) or a.authors
84
84
  a.publication_year = bib_mgr.get_year_by_key(key) or a.publication_year
@@ -1,11 +1,15 @@
1
1
  """A module for writing articles using RAG (Retrieval-Augmented Generation) capabilities."""
2
2
 
3
3
  from asyncio import gather
4
- from typing import Optional
4
+ from pathlib import Path
5
+ from typing import List, Optional
5
6
 
7
+ from fabricatio import BibManager
6
8
  from fabricatio.capabilities.censor import Censor
7
9
  from fabricatio.capabilities.rag import RAG
8
10
  from fabricatio.models.action import Action
11
+ from fabricatio.models.extra.aricle_rag import ArticleChunk
12
+ from fabricatio.models.extra.article_essence import ArticleEssence
9
13
  from fabricatio.models.extra.article_main import Article, ArticleSubsection
10
14
  from fabricatio.models.extra.rule import RuleSet
11
15
  from fabricatio.utils import ok
@@ -97,9 +101,36 @@ class TweakArticleRAG(Action, RAG, Censor):
97
101
  await self.censor_obj_inplace(
98
102
  subsec,
99
103
  ruleset=ruleset,
100
- reference=f"{await self.aretrieve_compact(refind_q, final_limit=self.ref_limit)}\n\n"
104
+ reference=f"{'\n\n'.join(d.display() for d in await self.aretrieve(refind_q, document_model=ArticleEssence, final_limit=self.ref_limit))}\n\n"
101
105
  f"You can use Reference above to rewrite the `{subsec.__class__.__name__}`.\n"
102
106
  f"You should Always use `{subsec.language}` as written language, "
103
107
  f"which is the original language of the `{subsec.title}`. "
104
108
  f"since rewrite a `{subsec.__class__.__name__}` in a different language is usually a bad choice",
105
109
  )
110
+
111
+
112
+ class ChunkArticle(Action):
113
+ """Chunk an article into smaller chunks."""
114
+
115
+ output_key:str = "article_chunks"
116
+ """The key used to store the output of the action."""
117
+ max_chunk_size: Optional[int] = None
118
+ """The maximum size of each chunk."""
119
+ max_overlapping_rate: Optional[float] = None
120
+ """The maximum overlapping rate between chunks."""
121
+
122
+ async def _execute(
123
+ self,
124
+ article_path: str | Path,
125
+ bib_manager: BibManager,
126
+ max_chunk_size: Optional[int] = None,
127
+ max_overlapping_rate: Optional[float] = None,
128
+ **_,
129
+ ) -> List[ArticleChunk]:
130
+ return ArticleChunk.from_file(
131
+ article_path,
132
+ bib_manager,
133
+ max_chunk_size=ok(max_chunk_size or self.max_chunk_size, "No max_chunk_size provided!"),
134
+ max_overlapping_rate=ok(max_overlapping_rate or self.max_overlapping_rate, "No max_overlapping_rate provided!"),
135
+ )
136
+
fabricatio/actions/rag.py CHANGED
@@ -5,34 +5,56 @@ from typing import List, Optional
5
5
  from questionary import text
6
6
 
7
7
  from fabricatio.capabilities.rag import RAG
8
+ from fabricatio.config import configs
8
9
  from fabricatio.journal import logger
9
10
  from fabricatio.models.action import Action
10
- from fabricatio.models.generic import Vectorizable
11
+ from fabricatio.models.extra.rag import MilvusClassicModel, MilvusDataBase
11
12
  from fabricatio.models.task import Task
13
+ from fabricatio.utils import ok
12
14
 
13
15
 
14
16
  class InjectToDB(Action, RAG):
15
17
  """Inject data into the database."""
16
18
 
17
19
  output_key: str = "collection_name"
20
+ collection_name: str = "my_collection"
21
+ """The name of the collection to inject data into."""
18
22
 
19
- async def _execute[T: Vectorizable](
20
- self, to_inject: Optional[T] | List[Optional[T]], collection_name: str = "my_collection",override_inject:bool=False, **_
23
+ async def _execute[T: MilvusDataBase](
24
+ self, to_inject: Optional[T] | List[Optional[T]], override_inject: bool = False, **_
21
25
  ) -> Optional[str]:
26
+ from pymilvus.milvus_client import IndexParams
27
+
28
+ if to_inject is None:
29
+ return None
22
30
  if not isinstance(to_inject, list):
23
31
  to_inject = [to_inject]
24
- logger.info(f"Injecting {len(to_inject)} items into the collection '{collection_name}'")
32
+ if not (seq := [t for t in to_inject if t is not None]): # filter out None
33
+ return None
34
+ logger.info(f"Injecting {len(seq)} items into the collection '{self.collection_name}'")
25
35
  if override_inject:
26
- self.check_client().client.drop_collection(collection_name)
27
- await self.view(collection_name, create=True).consume_string(
28
- [
29
- t.prepare_vectorization(self.embedding_max_sequence_length)
30
- for t in to_inject
31
- if isinstance(t, Vectorizable)
32
- ],
33
- )
34
-
35
- return collection_name
36
+ self.check_client().client.drop_collection(self.collection_name)
37
+
38
+ await self.view(
39
+ self.collection_name,
40
+ create=True,
41
+ schema=seq[0].as_milvus_schema(
42
+ ok(
43
+ self.milvus_dimensions
44
+ or configs.rag.milvus_dimensions
45
+ or self.embedding_dimensions
46
+ or configs.embedding.dimensions
47
+ ),
48
+ ),
49
+ index_params=IndexParams(
50
+ seq[0].vector_field_name,
51
+ index_name=seq[0].vector_field_name,
52
+ index_type=seq[0].index_type,
53
+ metric_type=seq[0].metric_type,
54
+ ),
55
+ ).add_document(seq, flush=True)
56
+
57
+ return self.collection_name
36
58
 
37
59
 
38
60
  class RAGTalk(Action, RAG):
@@ -62,10 +84,10 @@ class RAGTalk(Action, RAG):
62
84
  user_say = await text("User: ").ask_async()
63
85
  if user_say is None:
64
86
  break
65
- gpt_say = await self.aask_retrieved(
66
- user_say,
67
- user_say,
68
- extra_system_message=f"You have to answer to user obeying task assigned to you:\n{task_input.briefing}",
87
+ ret: List[MilvusClassicModel] = await self.aretrieve(user_say, document_model=MilvusClassicModel)
88
+
89
+ gpt_say = await self.aask(
90
+ user_say, system_message="\n".join(m.text for m in ret) + "\nYou can refer facts provided above."
69
91
  )
70
92
  print(f"GPT: {gpt_say}") # noqa: T201
71
93
  counter += 1
@@ -130,7 +130,7 @@ class RAG(EmbeddingUsage):
130
130
  if isinstance(data, MilvusDataBase):
131
131
  data = [data]
132
132
 
133
- data_vec = await self.vectorize([d.to_vectorize for d in data])
133
+ data_vec = await self.vectorize([d.prepare_vectorization() for d in data])
134
134
  prepared_data = [d.prepare_insertion(vec) for d, vec in zip(data, data_vec, strict=True)]
135
135
 
136
136
  c_name = collection_name or self.safe_target_collection
@@ -188,13 +188,15 @@ class RAG(EmbeddingUsage):
188
188
  async def aretrieve[D: MilvusDataBase](
189
189
  self,
190
190
  query: List[str] | str,
191
+ document_model: Type[D],
191
192
  final_limit: int = 20,
192
- **kwargs: Unpack[FetchKwargs[D]],
193
+ **kwargs: Unpack[FetchKwargs],
193
194
  ) -> List[D]:
194
195
  """Retrieve data from the collection.
195
196
 
196
197
  Args:
197
198
  query (List[str] | str): The query to be used for retrieval.
199
+ document_model (Type[D]): The model class used to convert retrieved data into document objects.
198
200
  final_limit (int): The final limit on the number of results to return.
199
201
  **kwargs (Unpack[FetchKwargs]): Additional keyword arguments for retrieval.
200
202
 
@@ -206,6 +208,7 @@ class RAG(EmbeddingUsage):
206
208
  return (
207
209
  await self.afetch_document(
208
210
  vecs=(await self.vectorize(query)),
211
+ document_model=document_model,
209
212
  **kwargs,
210
213
  )
211
214
  )[:final_limit]
@@ -1,10 +1,9 @@
1
1
  """A module containing kwargs types for content correction and checking operations."""
2
2
 
3
3
  from importlib.util import find_spec
4
- from typing import Required, Type, TypedDict
4
+ from typing import NotRequired, TypedDict
5
5
 
6
6
  from fabricatio.models.extra.problem import Improvement
7
- from fabricatio.models.extra.rag import MilvusDataBase
8
7
  from fabricatio.models.extra.rule import RuleSet
9
8
  from fabricatio.models.generic import SketchedAble
10
9
  from fabricatio.models.kwargs_types import ReferencedKwargs
@@ -49,19 +48,13 @@ if find_spec("pymilvus"):
49
48
  schema: CollectionSchema | None
50
49
  index_params: IndexParams | None
51
50
 
52
- class FetchKwargs[D: MilvusDataBase](TypedDict, total=False):
51
+ class FetchKwargs(TypedDict):
53
52
  """Arguments for fetching data from vector collections.
54
53
 
55
54
  Controls how data is retrieved from vector databases, including filtering
56
55
  and result limiting parameters.
57
56
  """
58
57
 
59
- document_model: Required[Type[D]]
60
- collection_name: str | None
61
- similarity_threshold: float
62
- result_per_query: int
63
-
64
- class RetrievalKwargs(FetchKwargs, total=False):
65
- """Arguments for retrieval operations."""
66
-
67
- final_limit: int
58
+ collection_name: NotRequired[str | None]
59
+ similarity_threshold: NotRequired[float]
60
+ result_per_query: NotRequired[int]
@@ -0,0 +1,120 @@
1
+ """A Module containing the article rag models."""
2
+
3
+ from pathlib import Path
4
+ from typing import ClassVar, Dict, List, Self, Unpack
5
+
6
+ from fabricatio.fs import safe_text_read
7
+ from fabricatio.journal import logger
8
+ from fabricatio.models.extra.rag import MilvusDataBase
9
+ from fabricatio.models.generic import AsPrompt
10
+ from fabricatio.models.kwargs_types import ChunkKwargs
11
+ from fabricatio.rust import BibManager, split_into_chunks
12
+ from fabricatio.utils import ok, wrapp_in_block
13
+ from more_itertools.recipes import flatten
14
+ from pydantic import Field
15
+
16
+
17
+ class ArticleChunk(MilvusDataBase, AsPrompt):
18
+ """The chunk of an article."""
19
+
20
+ head_split: ClassVar[List[str]] = [
21
+ "引 言",
22
+ "引言",
23
+ "绪 论",
24
+ "绪论",
25
+ "前言",
26
+ "INTRODUCTION",
27
+ "Introduction",
28
+ ]
29
+ tail_split: ClassVar[List[str]] = [
30
+ "参 考 文 献",
31
+ "参 考 文 献",
32
+ "参考文献",
33
+ "REFERENCES",
34
+ "References",
35
+ "Bibliography",
36
+ "Reference",
37
+ ]
38
+ chunk: str
39
+ """The segment of the article"""
40
+ year: int
41
+ """The year of the article"""
42
+ authors: List[str] = Field(default_factory=list)
43
+ """The authors of the article"""
44
+ article_title: str
45
+ """The title of the article"""
46
+ bibtex_cite_key: str
47
+ """The bibtex cite key of the article"""
48
+
49
+ def _as_prompt_inner(self) -> Dict[str, str]:
50
+ return {
51
+ self.article_title: f"{wrapp_in_block(self.chunk, 'Referring Content')}\n"
52
+ f"Authors: {';'.join(self.authors)}\n"
53
+ f"Published Year: {self.year}\n"
54
+ f"Bibtex Key: {self.bibtex_cite_key}\n",
55
+ }
56
+
57
+ def _prepare_vectorization_inner(self) -> str:
58
+ return self.chunk
59
+
60
+ @classmethod
61
+ def from_file[P: str | Path](
62
+ cls, path: P | List[P], bib_mgr: BibManager, **kwargs: Unpack[ChunkKwargs]
63
+ ) -> List[Self]:
64
+ """Load the article chunks from the file."""
65
+ if isinstance(path, list):
66
+ result = list(flatten(cls._from_file_inner(p, bib_mgr, **kwargs) for p in path))
67
+ logger.debug(f"Number of chunks created from list of files: {len(result)}")
68
+ return result
69
+
70
+ return cls._from_file_inner(path, bib_mgr, **kwargs)
71
+
72
+ @classmethod
73
+ def _from_file_inner(cls, path: str | Path, bib_mgr: BibManager, **kwargs: Unpack[ChunkKwargs]) -> List[Self]:
74
+ path = Path(path)
75
+
76
+ title_seg = path.stem.split(" - ").pop()
77
+
78
+ key = (
79
+ bib_mgr.get_cite_key_by_title(title_seg)
80
+ or bib_mgr.get_cite_key_by_title_fuzzy(title_seg)
81
+ or bib_mgr.get_cite_key_fuzzy(path.stem)
82
+ )
83
+ if key is None:
84
+ logger.warning(f"no cite key found for {path.as_posix()}, skip.")
85
+ return []
86
+ authors = ok(bib_mgr.get_author_by_key(key), f"no author found for {key}")
87
+ year = ok(bib_mgr.get_year_by_key(key), f"no year found for {key}")
88
+ article_title = ok(bib_mgr.get_title_by_key(key), f"no title found for {key}")
89
+
90
+ result = [
91
+ cls(chunk=c, year=year, authors=authors, article_title=article_title, bibtex_cite_key=key)
92
+ for c in split_into_chunks(cls.strip(safe_text_read(path)), **kwargs)
93
+ ]
94
+ logger.debug(f"Number of chunks created from file {path.as_posix()}: {len(result)}")
95
+ return result
96
+
97
+ @classmethod
98
+ def strip(cls, string: str) -> str:
99
+ """Strip the head and tail of the string."""
100
+ logger.debug(f"String length before strip: {(original := len(string))}")
101
+ for split in (s for s in cls.head_split if s in string):
102
+ logger.debug(f"Strip head using {split}")
103
+ parts = string.split(split)
104
+ string = split.join(parts[1:]) if len(parts) > 1 else parts[0]
105
+ break
106
+ logger.debug(
107
+ f"String length after head strip: {(stripped_len := len(string))}, decreased by {(d := original - stripped_len)}"
108
+ )
109
+ if not d:
110
+ logger.warning("No decrease at head strip, which is might be abnormal.")
111
+ for split in (s for s in cls.tail_split if s in string):
112
+ logger.debug(f"Strip tail using {split}")
113
+ parts = string.split(split)
114
+ string = split.join(parts[:-1]) if len(parts) > 1 else parts[0]
115
+ break
116
+ logger.debug(f"String length after tail strip: {len(string)}, decreased by {(d := stripped_len - len(string))}")
117
+ if not d:
118
+ logger.warning("No decrease at tail strip, which is might be abnormal.")
119
+
120
+ return string
@@ -1,8 +1,9 @@
1
1
  """ArticleEssence: Semantic fingerprint of academic paper for structured analysis."""
2
2
 
3
- from typing import List, Self
3
+ from typing import List
4
4
 
5
- from fabricatio.models.generic import Display, PersistentAble, ProposedAble, Vectorizable
5
+ from fabricatio.models.extra.rag import MilvusDataBase
6
+ from fabricatio.models.generic import PersistentAble, SketchedAble
6
7
  from pydantic import BaseModel
7
8
 
8
9
 
@@ -54,7 +55,7 @@ class Highlightings(BaseModel):
54
55
  """
55
56
 
56
57
 
57
- class ArticleEssence(ProposedAble, Display, PersistentAble, Vectorizable):
58
+ class ArticleEssence(SketchedAble, PersistentAble, MilvusDataBase):
58
59
  """Structured representation of a scientific article's core elements in its original language."""
59
60
 
60
61
  language: str
@@ -93,7 +94,7 @@ class ArticleEssence(ProposedAble, Display, PersistentAble, Vectorizable):
93
94
  bibtex_cite_key: str
94
95
  """Bibtex cite key of the original article."""
95
96
 
96
- def update_cite_key(self, new_cite_key: str) -> Self:
97
- """Update the bibtex_cite_key of the article."""
98
- self.bibtex_cite_key = new_cite_key
99
- return self
97
+ def _prepare_vectorization_inner(self) -> str:
98
+ return self.compact()
99
+
100
+
@@ -1,10 +1,13 @@
1
1
  """A module containing the RAG (Retrieval-Augmented Generation) models."""
2
2
 
3
- from abc import ABCMeta, abstractmethod
4
- from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Self, Sequence
3
+ from abc import ABC
4
+ from functools import partial
5
+ from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Self, Sequence, Set
5
6
 
6
7
  from fabricatio.decorators import precheck_package
7
- from pydantic import BaseModel, ConfigDict, JsonValue
8
+ from fabricatio.models.generic import Vectorizable
9
+ from fabricatio.utils import ok
10
+ from pydantic import JsonValue
8
11
 
9
12
  if TYPE_CHECKING:
10
13
  from importlib.util import find_spec
@@ -15,14 +18,18 @@ if TYPE_CHECKING:
15
18
  from pymilvus import CollectionSchema
16
19
 
17
20
 
18
- class MilvusDataBase(BaseModel, metaclass=ABCMeta):
21
+ class MilvusDataBase(Vectorizable, ABC):
19
22
  """A base class for Milvus data."""
20
23
 
21
- model_config = ConfigDict(use_attribute_docstrings=True)
22
-
23
24
  primary_field_name: ClassVar[str] = "id"
24
-
25
+ """The name of the primary field in Milvus."""
25
26
  vector_field_name: ClassVar[str] = "vector"
27
+ """The name of the vector field in Milvus."""
28
+
29
+ index_type: ClassVar[str] = "FLAT"
30
+ """The type of index to be used in Milvus."""
31
+ metric_type: ClassVar[str] = "COSINE"
32
+ """The type of metric to be used in Milvus."""
26
33
 
27
34
  def prepare_insertion(self, vector: List[float]) -> Dict[str, Any]:
28
35
  """Prepares the data for insertion into Milvus.
@@ -32,11 +39,6 @@ class MilvusDataBase(BaseModel, metaclass=ABCMeta):
32
39
  """
33
40
  return {**self.model_dump(exclude_none=True, by_alias=True), self.vector_field_name: vector}
34
41
 
35
- @property
36
- @abstractmethod
37
- def to_vectorize(self) -> str:
38
- """The text representation of the data."""
39
-
40
42
  @classmethod
41
43
  @precheck_package(
42
44
  "pymilvus", "pymilvus is not installed. Have you installed `fabricatio[rag]` instead of `fabricatio`?"
@@ -50,23 +52,47 @@ class MilvusDataBase(BaseModel, metaclass=ABCMeta):
50
52
  FieldSchema(cls.vector_field_name, dtype=DataType.FLOAT_VECTOR, dim=dimension),
51
53
  ]
52
54
 
53
- type_mapping = {
54
- str: DataType.STRING,
55
- int: DataType.INT64,
56
- float: DataType.DOUBLE,
57
- JsonValue: DataType.JSON,
58
- # TODO add more mapping
59
- }
60
-
61
55
  for k, v in cls.model_fields.items():
62
56
  k: str
63
57
  v: FieldInfo
64
- fields.append(
65
- FieldSchema(k, dtype=type_mapping.get(v.annotation, DataType.UNKNOWN), description=v.description or "")
66
- )
58
+ schema = partial(FieldSchema, k, description=v.description or "")
59
+ anno = ok(v.annotation)
60
+
61
+ if anno == int:
62
+ fields.append(schema(dtype=DataType.INT64))
63
+ elif anno == str:
64
+ fields.append(schema(dtype=DataType.VARCHAR, max_length=65535))
65
+ elif anno == float:
66
+ fields.append(schema(dtype=DataType.DOUBLE))
67
+ elif anno == list[str] or anno == List[str] or anno == set[str] or anno == Set[str]:
68
+ fields.append(
69
+ schema(dtype=DataType.ARRAY, element_type=DataType.VARCHAR, max_length=65535, max_capacity=4096)
70
+ )
71
+ elif anno == list[int] or anno == List[int] or anno == set[int] or anno == Set[int]:
72
+ fields.append(schema(dtype=DataType.ARRAY, element_type=DataType.INT64, max_capacity=4096))
73
+ elif anno == list[float] or anno == List[float] or anno == set[float] or anno == Set[float]:
74
+ fields.append(schema(dtype=DataType.ARRAY, element_type=DataType.DOUBLE, max_capacity=4096))
75
+ elif anno == JsonValue:
76
+ fields.append(schema(dtype=DataType.JSON))
77
+
78
+ else:
79
+ raise NotImplementedError(f"{k}:{anno} is not supported")
80
+
67
81
  return CollectionSchema(fields)
68
82
 
69
83
  @classmethod
70
84
  def from_sequence(cls, data: Sequence[Dict[str, Any]]) -> List[Self]:
71
85
  """Constructs a list of instances from a sequence of dictionaries."""
72
86
  return [cls(**d) for d in data]
87
+
88
+
89
+ class MilvusClassicModel(MilvusDataBase):
90
+ """A class representing a classic model stored in Milvus."""
91
+
92
+ text: str
93
+ """The text to be stored in Milvus."""
94
+ subject: str = ""
95
+ """The subject of the text."""
96
+
97
+ def _prepare_vectorization_inner(self) -> str:
98
+ return self.text
@@ -6,7 +6,6 @@ from pathlib import Path
6
6
  from typing import Any, Callable, Dict, Iterable, List, Optional, Self, Type, Union, final, overload
7
7
 
8
8
  import orjson
9
- import rtoml
10
9
  from fabricatio.config import configs
11
10
  from fabricatio.fs.readers import MAGIKA, safe_text_read
12
11
  from fabricatio.journal import logger
@@ -53,7 +52,7 @@ class Display(Base):
53
52
  Returns:
54
53
  str: JSON string with 1-level indentation for readability
55
54
  """
56
- return self.model_dump_json(indent=1,by_alias=True)
55
+ return self.model_dump_json(indent=1, by_alias=True)
57
56
 
58
57
  def compact(self) -> str:
59
58
  """Generate compact JSON representation.
@@ -225,7 +224,7 @@ class PersistentAble(Base):
225
224
  - Hash generated from JSON content ensures uniqueness
226
225
  """
227
226
  p = Path(path)
228
- out = self.model_dump_json(indent=1,by_alias=True)
227
+ out = self.model_dump_json(indent=1, by_alias=True)
229
228
 
230
229
  # Generate a timestamp in the format YYYYMMDD_HHMMSS
231
230
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
@@ -299,16 +298,18 @@ class Language(Base):
299
298
  """Class that provides a language attribute."""
300
299
 
301
300
  @property
302
- def language(self)->str:
301
+ def language(self) -> str:
303
302
  """Get the language of the object."""
304
- if isinstance(self,Described):
303
+ if isinstance(self, Described):
305
304
  return detect_language(self.description)
306
- if isinstance(self,Titled):
305
+ if isinstance(self, Titled):
307
306
  return detect_language(self.title)
308
- if isinstance(self,Named):
307
+ if isinstance(self, Named):
309
308
  return detect_language(self.name)
310
309
 
311
310
  return detect_language(self.model_dump_json(by_alias=True))
311
+
312
+
312
313
  class ModelHash(Base):
313
314
  """Class that provides a hash value for the object.
314
315
 
@@ -550,7 +551,7 @@ class FinalizedDumpAble(Base):
550
551
  Returns:
551
552
  str: The finalized dump of the object.
552
553
  """
553
- return self.model_dump_json(indent=1,by_alias=True)
554
+ return self.model_dump_json(indent=1, by_alias=True)
554
555
 
555
556
  def finalized_dump_to(self, path: str | Path) -> Self:
556
557
  """Finalize the dump of the object to a file.
@@ -662,8 +663,9 @@ class Vectorizable(Base):
662
663
  This class includes methods to prepare the model for vectorization, ensuring it fits within a specified token length.
663
664
  """
664
665
 
666
+ @abstractmethod
665
667
  def _prepare_vectorization_inner(self) -> str:
666
- return rtoml.dumps(self.model_dump())
668
+ """Prepare the model for vectorization."""
667
669
 
668
670
  @final
669
671
  def prepare_vectorization(self, max_length: Optional[int] = None) -> str:
@@ -681,8 +683,7 @@ class Vectorizable(Base):
681
683
  max_length = max_length or configs.embedding.max_sequence_length
682
684
  chunk = self._prepare_vectorization_inner()
683
685
  if max_length and (length := token_counter(text=chunk)) > max_length:
684
- logger.error(err := f"Chunk exceeds maximum sequence length {max_length}, got {length}, see {chunk}")
685
- raise ValueError(err)
686
+ raise ValueError(f"Chunk exceeds maximum sequence length {max_length}, got {length}, see \n{chunk}")
686
687
 
687
688
  return chunk
688
689
 
@@ -1,11 +1,18 @@
1
1
  """This module contains the types for the keyword arguments of the methods in the models module."""
2
2
 
3
- from typing import Any, Dict, List, Optional, Required, TypedDict
3
+ from typing import Any, Dict, List, NotRequired, Optional, Required, TypedDict
4
4
 
5
5
  from litellm.caching.caching import CacheMode
6
6
  from litellm.types.caching import CachingSupportedCallTypes
7
7
 
8
8
 
9
+ class ChunkKwargs(TypedDict):
10
+ """Configuration parameters for chunking operations."""
11
+
12
+ max_chunk_size: int
13
+ max_overlapping_rate: NotRequired[float]
14
+
15
+
9
16
  class EmbeddingKwargs(TypedDict, total=False):
10
17
  """Configuration parameters for text embedding operations.
11
18
 
Binary file
fabricatio/rust.pyi CHANGED
@@ -147,7 +147,7 @@ class BibManager:
147
147
  RuntimeError: If file cannot be read or parsed
148
148
  """
149
149
 
150
- def get_cite_key(self, title: str) -> Optional[str]:
150
+ def get_cite_key_by_title(self, title: str) -> Optional[str]:
151
151
  """Find citation key by exact title match.
152
152
 
153
153
  Args:
@@ -156,6 +156,15 @@ class BibManager:
156
156
  Returns:
157
157
  Citation key if exact match found, None otherwise
158
158
  """
159
+ def get_cite_key_by_title_fuzzy(self, title: str) -> Optional[str]:
160
+ """Find citation key by fuzzy title match.
161
+
162
+ Args:
163
+ title: Search term to find in bibliography entries
164
+
165
+ Returns:
166
+ Citation key of best matching entry, or None if no good match
167
+ """
159
168
 
160
169
  def get_cite_key_fuzzy(self, query: str) -> Optional[str]:
161
170
  """Find best matching citation using fuzzy text search.
fabricatio/utils.py CHANGED
@@ -25,7 +25,7 @@ async def ask_edit(
25
25
  return res
26
26
 
27
27
 
28
- def override_kwargs(kwargs: Mapping[str,Any], **overrides) -> Dict[str, Any]:
28
+ def override_kwargs(kwargs: Mapping[str, Any], **overrides) -> Dict[str, Any]:
29
29
  """Override the values in kwargs with the provided overrides."""
30
30
  new_kwargs = dict(kwargs.items())
31
31
  new_kwargs.update({k: v for k, v in overrides.items() if v is not None})
@@ -52,3 +52,16 @@ def ok[T](val: Optional[T], msg: str = "Value is None") -> T:
52
52
  if val is None:
53
53
  raise ValueError(msg)
54
54
  return val
55
+
56
+
57
+ def wrapp_in_block(string: str, title: str) -> str:
58
+ """Wraps a string in a block with a title.
59
+
60
+ Args:
61
+ string: The string to wrap.
62
+ title: The title of the block.
63
+
64
+ Returns:
65
+ str: The wrapped string.
66
+ """
67
+ return f"--- Start of {title} ---\n{string}\n--- End of {title} ---"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fabricatio
3
- Version: 0.2.10.dev0
3
+ Version: 0.2.10.dev1
4
4
  Classifier: License :: OSI Approved :: MIT License
5
5
  Classifier: Programming Language :: Rust
6
6
  Classifier: Programming Language :: Python :: 3.12
@@ -23,7 +23,6 @@ Requires-Dist: pymitter>=1.0.0
23
23
  Requires-Dist: questionary>=2.1.0
24
24
  Requires-Dist: regex>=2024.11.6
25
25
  Requires-Dist: rich>=13.9.4
26
- Requires-Dist: rtoml>=0.12.0
27
26
  Requires-Dist: pymilvus>=2.5.4 ; extra == 'rag'
28
27
  Requires-Dist: fabricatio[calc,plot,rag] ; extra == 'full'
29
28
  Requires-Dist: sympy>=1.13.3 ; extra == 'calc'
@@ -45,8 +44,6 @@ Project-URL: Issues, https://github.com/Whth/fabricatio/issues
45
44
  # Fabricatio
46
45
 
47
46
  ![MIT License](https://img.shields.io/badge/license-MIT-blue.svg)
48
- ![Python 3.12+](https://img.shields.io/badge/python-3.12+-blue.svg)
49
- ![Build Status](https://img.shields.io/badge/build-passing-brightgreen)
50
47
 
51
48
  ## Overview
52
49
 
@@ -1,10 +1,10 @@
1
- fabricatio-0.2.10.dev0.dist-info/METADATA,sha256=P8fqqWkcxcC1a42_I3GdmnP6qB8ZKsGoqyn4u-9yRT4,5289
2
- fabricatio-0.2.10.dev0.dist-info/WHEEL,sha256=jABKVkLC9kJr8mi_er5jOqpiQUjARSLXDUIIxDqsS50,96
3
- fabricatio-0.2.10.dev0.dist-info/licenses/LICENSE,sha256=do7J7EiCGbq0QPbMAL_FqLYufXpHnCnXBOuqVPwSV8Y,1088
4
- fabricatio/actions/article.py,sha256=C4t3hB5_k4dDrVuLzVTJIp3D6XyvvlRyGIov5-mNows,8984
5
- fabricatio/actions/article_rag.py,sha256=itGH-VCKTVFm7hrYIOOT4FyFXP8CbL042kpYNI9a2BE,4735
1
+ fabricatio-0.2.10.dev1.dist-info/METADATA,sha256=HRPFnRmPH19wYpcE1dJoL6Kltg2vewsF432CMSqV-Yg,5118
2
+ fabricatio-0.2.10.dev1.dist-info/WHEEL,sha256=jABKVkLC9kJr8mi_er5jOqpiQUjARSLXDUIIxDqsS50,96
3
+ fabricatio-0.2.10.dev1.dist-info/licenses/LICENSE,sha256=do7J7EiCGbq0QPbMAL_FqLYufXpHnCnXBOuqVPwSV8Y,1088
4
+ fabricatio/actions/article.py,sha256=0PE-b47WvBQpa4XPwc4sMe11GY8KO71N4pui_Yrnz_I,8993
5
+ fabricatio/actions/article_rag.py,sha256=79466dKS1TaT2rw5gadM1WfZoRJy07LmtoMXvfCZ2-U,5952
6
6
  fabricatio/actions/output.py,sha256=gkC2u_VpMJ6jOnbyRAJN24UVK7iDAMzhItYukaW8Spk,6498
7
- fabricatio/actions/rag.py,sha256=5nSih3YUkdt1uU02hSAMW6sADq9mkMOR1wDv7zIrIGQ,2737
7
+ fabricatio/actions/rag.py,sha256=9fM4oR5B4AJNhKmWfUlNIeF4QkUntQscICNVo_zWPSA,3580
8
8
  fabricatio/actions/rules.py,sha256=SNvAvQx4xUare16Za_dEpYlYI_PJNnbiO-E0XDa5JT4,2857
9
9
  fabricatio/actions/__init__.py,sha256=wVENCFtpVb1rLFxoOFJt9-8smLWXuJV7IwA8P3EfFz4,48
10
10
  fabricatio/capabilities/advanced_judge.py,sha256=selB0Gwf1F4gGJlwBiRo6gI4KOUROgh3WnzO3mZFEls,706
@@ -12,7 +12,7 @@ fabricatio/capabilities/censor.py,sha256=bBT5qy-kp7fh8g4Lz3labSwxwJ60gGd_vrkc6k1
12
12
  fabricatio/capabilities/check.py,sha256=kYqzohhv2bZfl1aKSUt7a8snT8YEl2zgha_ZdAdMMfQ,8622
13
13
  fabricatio/capabilities/correct.py,sha256=W_cInqlciNEhyMK0YI53jk4EvW9uAdge90IO9OElUmA,10420
14
14
  fabricatio/capabilities/propose.py,sha256=hkBeSlmcTdfYWT-ph6nlbtHXBozi_JXqXlWcnBy3W78,2007
15
- fabricatio/capabilities/rag.py,sha256=eWA4lDs6lnBFCK80H1JF68yOe7oScydQekXlBs2X0OI,9396
15
+ fabricatio/capabilities/rag.py,sha256=kqcunWBC6oA4P1rzIG2Xu9zqSg73H3uKPF41JJQ1HVI,9595
16
16
  fabricatio/capabilities/rating.py,sha256=Wt_H5fA1H4XuZGIMI8pr0cp_6jnXJABlo8lfU_4Fp5A,17645
17
17
  fabricatio/capabilities/review.py,sha256=-EMZe0ADFPT6fPGmra16UPjJC1M3rAs6dPFdTZ88Fgg,5060
18
18
  fabricatio/capabilities/task.py,sha256=JahC61X233UIPsjovxJgc_yqj_BjWZJBCzJZq11M2Xk,4417
@@ -26,37 +26,38 @@ fabricatio/fs/readers.py,sha256=M5kojKWsJQMQpE4CBbYvas0JKmPaiaYSfWmiqJx1SP4,1884
26
26
  fabricatio/fs/__init__.py,sha256=PCf0s_9KDjVfNw7AfPoJzGt3jMq4gJOfbcT4pb0D0ZY,588
27
27
  fabricatio/journal.py,sha256=stnEP88aUBA_GmU9gfTF2EZI8FS2OyMLGaMSTgK4QgA,476
28
28
  fabricatio/models/action.py,sha256=Kfa-zojgHQ1vPoC2lQp-thTTp0oySKn7k6I4ea6iYTs,9837
29
- fabricatio/models/adv_kwargs_types.py,sha256=YojZbB7m7VHA8woYnJpkLF4zPF2aYqv__SnCMK2cG-o,2180
29
+ fabricatio/models/adv_kwargs_types.py,sha256=kUO-SiZtFuz5cZCmMLnJJ9tjQ4-Zd_foo6R8HQMlM5A,1950
30
30
  fabricatio/models/events.py,sha256=wiirk_ASg3iXDOZU_gIimci1VZVzWE1nDmxy-hQVJ9M,4150
31
31
  fabricatio/models/extra/advanced_judge.py,sha256=INUl_41C8jkausDekkjnEmTwNfLCJ23TwFjq2cM23Cw,1092
32
+ fabricatio/models/extra/aricle_rag.py,sha256=I65Dcip3iibQdkACPF-EgYv7bSlpXB9oj8eq-R-Tjdc,4681
32
33
  fabricatio/models/extra/article_base.py,sha256=DxBex4UsMAFmHmriwXkcvGIuU-WTSD4ZfzDEk-no9TA,11894
33
- fabricatio/models/extra/article_essence.py,sha256=xd6j-PDqjhrMjgUmyfk6HqkyMLu-sS9feUo0sZ3QABY,2825
34
+ fabricatio/models/extra/article_essence.py,sha256=mlIkkRMR3I1RtqiiOnmIE3Vy623L4eECumkRzryE1pw,2749
34
35
  fabricatio/models/extra/article_main.py,sha256=zGzcf51abcWwiaX6iyi2V7upBLa-DBovnpTJj-qYLeA,7878
35
36
  fabricatio/models/extra/article_outline.py,sha256=w7O0SHgC7exbptWVbR62FMHAueMgBpyWKVYMGGl_oj8,1427
36
37
  fabricatio/models/extra/article_proposal.py,sha256=NbyjW-7UiFPtnVD9nte75re4xL2pD4qL29PpNV4Cg_M,1870
37
38
  fabricatio/models/extra/patches.py,sha256=_WNCxtYzzsVfUxI16vu4IqsLahLYRHdbQN9er9tqhC0,997
38
39
  fabricatio/models/extra/problem.py,sha256=zZEnjBW2XGRVpJpUp09f1J_w5A1zU-LhxX78AVCq9ts,7113
39
- fabricatio/models/extra/rag.py,sha256=_atfPwk3nzhiilAOC5H1WMBgTxcZvDn3qx0CVhETlZ8,2561
40
+ fabricatio/models/extra/rag.py,sha256=RMi8vhEPB0I5mVmjRLRLxYHUnm9pFhvVwysaIwmW2s0,3955
40
41
  fabricatio/models/extra/rule.py,sha256=KQQELVhCLUXhEZ35jU3WGYqKHuCYEAkn0p6pxAE-hOU,2625
41
42
  fabricatio/models/extra/__init__.py,sha256=XlYnS_2B9nhLhtQkjE7rvvfPmAAtXVdNi9bSDAR-Ge8,54
42
- fabricatio/models/generic.py,sha256=BS-K8Rd_1aLU9jIExtXScvzNZ-lsBCmHlaBCjNJ6g3s,30655
43
- fabricatio/models/kwargs_types.py,sha256=J4klrMVybkCWg8Fq4x27o5QSq8jmGg2XvneW66jI0Wc,4565
43
+ fabricatio/models/generic.py,sha256=M6K4uMSy4zKoTX5LyZFB8vXw8dTR9nZqec84eE-vPfw,30643
44
+ fabricatio/models/kwargs_types.py,sha256=r0fgI4ExuAc0MMsgWs8fAyaQ9Z_PRRAKTr53pPP5JYY,4747
44
45
  fabricatio/models/role.py,sha256=-CRcj5_M3_ciLPzwiNn92grBmwoSLQ-n4koVZiCNTBM,2953
45
46
  fabricatio/models/task.py,sha256=SxWI-b5jlQcGmNsjQ2aKDyywXwGiUvCR1rgUhk-pli8,10503
46
47
  fabricatio/models/tool.py,sha256=jQ51g4lwTPfsMF1nbreDJtBczbxIHoXcPuLSOqHliq8,12506
47
48
  fabricatio/models/usages.py,sha256=VLBpNs7zfNPqROvI2IXlqsoqKYSW8L6usNwZ1HXZVOY,34339
48
49
  fabricatio/parser.py,sha256=qN2godNsArmb90btOMxgqlol57166DyYsV2JlU8DlHs,6532
49
50
  fabricatio/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
- fabricatio/rust.pyi,sha256=vSItFXKj7YG6b7gmObMo99rWztsiYj4Ji124UNJbhd0,6957
51
+ fabricatio/rust.pyi,sha256=uVHcjDkG4gPcWX_7pxJXHroamY6Db46tQci96THbwJs,7280
51
52
  fabricatio/rust_instances.py,sha256=Byeo8KHW_dJiXujJq7YPGDLBX5bHNDYbBc4sY3uubVY,313
52
53
  fabricatio/toolboxes/arithmetic.py,sha256=WLqhY-Pikv11Y_0SGajwZx3WhsLNpHKf9drzAqOf_nY,1369
53
54
  fabricatio/toolboxes/fs.py,sha256=l4L1CVxJmjw9Ld2XUpIlWfV0_Fu_2Og6d3E13I-S4aE,736
54
55
  fabricatio/toolboxes/__init__.py,sha256=KBJi5OG_pExscdlM7Bnt_UF43j4I3Lv6G71kPVu4KQU,395
55
- fabricatio/utils.py,sha256=uy-W5b1d8oM1UTk2IT1lLGKIn_Pmo3XU5xbahjyDESE,1710
56
+ fabricatio/utils.py,sha256=PKb2yfAe7iRwGJklLB5uZWuWhT0Tm47iHAqPo-zl5CQ,2039
56
57
  fabricatio/workflows/articles.py,sha256=ObYTFUqLUk_CzdmmnX6S7APfxcGmPFqnFr9pdjU7Z4Y,969
57
58
  fabricatio/workflows/rag.py,sha256=-YYp2tlE9Vtfgpg6ROpu6QVO8j8yVSPa6yDzlN3qVxs,520
58
59
  fabricatio/workflows/__init__.py,sha256=5ScFSTA-bvhCesj3U9Mnmi6Law6N1fmh5UKyh58L3u8,51
59
60
  fabricatio/__init__.py,sha256=Rmvq2VgdS2u68vnOi2i5RbeWbAwrJDbk8D8D883PJWE,1022
60
- fabricatio/rust.cp312-win_amd64.pyd,sha256=pgavk4szzu7Fdb8oC-IK3XmhB9upqwxJxkMG_Ep65eQ,2235904
61
- fabricatio-0.2.10.dev0.data/scripts/tdown.exe,sha256=hH6MCz4SZWxYNzpNzlZ3KzTM2-H_wFyxMrKie4K24Go,3364864
62
- fabricatio-0.2.10.dev0.dist-info/RECORD,,
61
+ fabricatio/rust.cp312-win_amd64.pyd,sha256=ql93jn1qacym6Ks927dxEGJb16rUyWPiW85fm9IE8A0,2251776
62
+ fabricatio-0.2.10.dev1.data/scripts/tdown.exe,sha256=WFQ7z3utWNkccmrzZPzJTb4N0_IBWrjirdWSOKcrj_0,3365888
63
+ fabricatio-0.2.10.dev1.dist-info/RECORD,,