fabricatio 0.2.10.dev0__cp312-cp312-win_amd64.whl → 0.2.10.dev1__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fabricatio/actions/article.py +2 -2
- fabricatio/actions/article_rag.py +33 -2
- fabricatio/actions/rag.py +40 -18
- fabricatio/capabilities/rag.py +5 -2
- fabricatio/models/adv_kwargs_types.py +5 -12
- fabricatio/models/extra/aricle_rag.py +120 -0
- fabricatio/models/extra/article_essence.py +8 -7
- fabricatio/models/extra/rag.py +49 -23
- fabricatio/models/generic.py +12 -11
- fabricatio/models/kwargs_types.py +8 -1
- fabricatio/rust.cp312-win_amd64.pyd +0 -0
- fabricatio/rust.pyi +10 -1
- fabricatio/utils.py +14 -1
- {fabricatio-0.2.10.dev0.data → fabricatio-0.2.10.dev1.data}/scripts/tdown.exe +0 -0
- {fabricatio-0.2.10.dev0.dist-info → fabricatio-0.2.10.dev1.dist-info}/METADATA +1 -4
- {fabricatio-0.2.10.dev0.dist-info → fabricatio-0.2.10.dev1.dist-info}/RECORD +18 -17
- {fabricatio-0.2.10.dev0.dist-info → fabricatio-0.2.10.dev1.dist-info}/WHEEL +0 -0
- {fabricatio-0.2.10.dev0.dist-info → fabricatio-0.2.10.dev1.dist-info}/licenses/LICENSE +0 -0
fabricatio/actions/article.py
CHANGED
@@ -4,6 +4,7 @@ from asyncio import gather
|
|
4
4
|
from pathlib import Path
|
5
5
|
from typing import Callable, List, Optional
|
6
6
|
|
7
|
+
from fabricatio.rust import BibManager, detect_language
|
7
8
|
from more_itertools import filter_map
|
8
9
|
|
9
10
|
from fabricatio.capabilities.censor import Censor
|
@@ -17,7 +18,6 @@ from fabricatio.models.extra.article_outline import ArticleOutline
|
|
17
18
|
from fabricatio.models.extra.article_proposal import ArticleProposal
|
18
19
|
from fabricatio.models.extra.rule import RuleSet
|
19
20
|
from fabricatio.models.task import Task
|
20
|
-
from fabricatio.rust import BibManager, detect_language
|
21
21
|
from fabricatio.utils import ok
|
22
22
|
|
23
23
|
|
@@ -78,7 +78,7 @@ class FixArticleEssence(Action):
|
|
78
78
|
out = []
|
79
79
|
count = 0
|
80
80
|
for a in article_essence:
|
81
|
-
if key := (bib_mgr.
|
81
|
+
if key := (bib_mgr.get_cite_key_by_title(a.title) or bib_mgr.get_cite_key_fuzzy(a.title)):
|
82
82
|
a.title = bib_mgr.get_title_by_key(key) or a.title
|
83
83
|
a.authors = bib_mgr.get_author_by_key(key) or a.authors
|
84
84
|
a.publication_year = bib_mgr.get_year_by_key(key) or a.publication_year
|
@@ -1,11 +1,15 @@
|
|
1
1
|
"""A module for writing articles using RAG (Retrieval-Augmented Generation) capabilities."""
|
2
2
|
|
3
3
|
from asyncio import gather
|
4
|
-
from
|
4
|
+
from pathlib import Path
|
5
|
+
from typing import List, Optional
|
5
6
|
|
7
|
+
from fabricatio import BibManager
|
6
8
|
from fabricatio.capabilities.censor import Censor
|
7
9
|
from fabricatio.capabilities.rag import RAG
|
8
10
|
from fabricatio.models.action import Action
|
11
|
+
from fabricatio.models.extra.aricle_rag import ArticleChunk
|
12
|
+
from fabricatio.models.extra.article_essence import ArticleEssence
|
9
13
|
from fabricatio.models.extra.article_main import Article, ArticleSubsection
|
10
14
|
from fabricatio.models.extra.rule import RuleSet
|
11
15
|
from fabricatio.utils import ok
|
@@ -97,9 +101,36 @@ class TweakArticleRAG(Action, RAG, Censor):
|
|
97
101
|
await self.censor_obj_inplace(
|
98
102
|
subsec,
|
99
103
|
ruleset=ruleset,
|
100
|
-
reference=f"{await self.
|
104
|
+
reference=f"{'\n\n'.join(d.display() for d in await self.aretrieve(refind_q, document_model=ArticleEssence, final_limit=self.ref_limit))}\n\n"
|
101
105
|
f"You can use Reference above to rewrite the `{subsec.__class__.__name__}`.\n"
|
102
106
|
f"You should Always use `{subsec.language}` as written language, "
|
103
107
|
f"which is the original language of the `{subsec.title}`. "
|
104
108
|
f"since rewrite a `{subsec.__class__.__name__}` in a different language is usually a bad choice",
|
105
109
|
)
|
110
|
+
|
111
|
+
|
112
|
+
class ChunkArticle(Action):
|
113
|
+
"""Chunk an article into smaller chunks."""
|
114
|
+
|
115
|
+
output_key:str = "article_chunks"
|
116
|
+
"""The key used to store the output of the action."""
|
117
|
+
max_chunk_size: Optional[int] = None
|
118
|
+
"""The maximum size of each chunk."""
|
119
|
+
max_overlapping_rate: Optional[float] = None
|
120
|
+
"""The maximum overlapping rate between chunks."""
|
121
|
+
|
122
|
+
async def _execute(
|
123
|
+
self,
|
124
|
+
article_path: str | Path,
|
125
|
+
bib_manager: BibManager,
|
126
|
+
max_chunk_size: Optional[int] = None,
|
127
|
+
max_overlapping_rate: Optional[float] = None,
|
128
|
+
**_,
|
129
|
+
) -> List[ArticleChunk]:
|
130
|
+
return ArticleChunk.from_file(
|
131
|
+
article_path,
|
132
|
+
bib_manager,
|
133
|
+
max_chunk_size=ok(max_chunk_size or self.max_chunk_size, "No max_chunk_size provided!"),
|
134
|
+
max_overlapping_rate=ok(max_overlapping_rate or self.max_overlapping_rate, "No max_overlapping_rate provided!"),
|
135
|
+
)
|
136
|
+
|
fabricatio/actions/rag.py
CHANGED
@@ -5,34 +5,56 @@ from typing import List, Optional
|
|
5
5
|
from questionary import text
|
6
6
|
|
7
7
|
from fabricatio.capabilities.rag import RAG
|
8
|
+
from fabricatio.config import configs
|
8
9
|
from fabricatio.journal import logger
|
9
10
|
from fabricatio.models.action import Action
|
10
|
-
from fabricatio.models.
|
11
|
+
from fabricatio.models.extra.rag import MilvusClassicModel, MilvusDataBase
|
11
12
|
from fabricatio.models.task import Task
|
13
|
+
from fabricatio.utils import ok
|
12
14
|
|
13
15
|
|
14
16
|
class InjectToDB(Action, RAG):
|
15
17
|
"""Inject data into the database."""
|
16
18
|
|
17
19
|
output_key: str = "collection_name"
|
20
|
+
collection_name: str = "my_collection"
|
21
|
+
"""The name of the collection to inject data into."""
|
18
22
|
|
19
|
-
async def _execute[T:
|
20
|
-
self, to_inject: Optional[T] | List[Optional[T]],
|
23
|
+
async def _execute[T: MilvusDataBase](
|
24
|
+
self, to_inject: Optional[T] | List[Optional[T]], override_inject: bool = False, **_
|
21
25
|
) -> Optional[str]:
|
26
|
+
from pymilvus.milvus_client import IndexParams
|
27
|
+
|
28
|
+
if to_inject is None:
|
29
|
+
return None
|
22
30
|
if not isinstance(to_inject, list):
|
23
31
|
to_inject = [to_inject]
|
24
|
-
|
32
|
+
if not (seq := [t for t in to_inject if t is not None]): # filter out None
|
33
|
+
return None
|
34
|
+
logger.info(f"Injecting {len(seq)} items into the collection '{self.collection_name}'")
|
25
35
|
if override_inject:
|
26
|
-
self.check_client().client.drop_collection(collection_name)
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
+
self.check_client().client.drop_collection(self.collection_name)
|
37
|
+
|
38
|
+
await self.view(
|
39
|
+
self.collection_name,
|
40
|
+
create=True,
|
41
|
+
schema=seq[0].as_milvus_schema(
|
42
|
+
ok(
|
43
|
+
self.milvus_dimensions
|
44
|
+
or configs.rag.milvus_dimensions
|
45
|
+
or self.embedding_dimensions
|
46
|
+
or configs.embedding.dimensions
|
47
|
+
),
|
48
|
+
),
|
49
|
+
index_params=IndexParams(
|
50
|
+
seq[0].vector_field_name,
|
51
|
+
index_name=seq[0].vector_field_name,
|
52
|
+
index_type=seq[0].index_type,
|
53
|
+
metric_type=seq[0].metric_type,
|
54
|
+
),
|
55
|
+
).add_document(seq, flush=True)
|
56
|
+
|
57
|
+
return self.collection_name
|
36
58
|
|
37
59
|
|
38
60
|
class RAGTalk(Action, RAG):
|
@@ -62,10 +84,10 @@ class RAGTalk(Action, RAG):
|
|
62
84
|
user_say = await text("User: ").ask_async()
|
63
85
|
if user_say is None:
|
64
86
|
break
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
87
|
+
ret: List[MilvusClassicModel] = await self.aretrieve(user_say, document_model=MilvusClassicModel)
|
88
|
+
|
89
|
+
gpt_say = await self.aask(
|
90
|
+
user_say, system_message="\n".join(m.text for m in ret) + "\nYou can refer facts provided above."
|
69
91
|
)
|
70
92
|
print(f"GPT: {gpt_say}") # noqa: T201
|
71
93
|
counter += 1
|
fabricatio/capabilities/rag.py
CHANGED
@@ -130,7 +130,7 @@ class RAG(EmbeddingUsage):
|
|
130
130
|
if isinstance(data, MilvusDataBase):
|
131
131
|
data = [data]
|
132
132
|
|
133
|
-
data_vec = await self.vectorize([d.
|
133
|
+
data_vec = await self.vectorize([d.prepare_vectorization() for d in data])
|
134
134
|
prepared_data = [d.prepare_insertion(vec) for d, vec in zip(data, data_vec, strict=True)]
|
135
135
|
|
136
136
|
c_name = collection_name or self.safe_target_collection
|
@@ -188,13 +188,15 @@ class RAG(EmbeddingUsage):
|
|
188
188
|
async def aretrieve[D: MilvusDataBase](
|
189
189
|
self,
|
190
190
|
query: List[str] | str,
|
191
|
+
document_model: Type[D],
|
191
192
|
final_limit: int = 20,
|
192
|
-
**kwargs: Unpack[FetchKwargs
|
193
|
+
**kwargs: Unpack[FetchKwargs],
|
193
194
|
) -> List[D]:
|
194
195
|
"""Retrieve data from the collection.
|
195
196
|
|
196
197
|
Args:
|
197
198
|
query (List[str] | str): The query to be used for retrieval.
|
199
|
+
document_model (Type[D]): The model class used to convert retrieved data into document objects.
|
198
200
|
final_limit (int): The final limit on the number of results to return.
|
199
201
|
**kwargs (Unpack[FetchKwargs]): Additional keyword arguments for retrieval.
|
200
202
|
|
@@ -206,6 +208,7 @@ class RAG(EmbeddingUsage):
|
|
206
208
|
return (
|
207
209
|
await self.afetch_document(
|
208
210
|
vecs=(await self.vectorize(query)),
|
211
|
+
document_model=document_model,
|
209
212
|
**kwargs,
|
210
213
|
)
|
211
214
|
)[:final_limit]
|
@@ -1,10 +1,9 @@
|
|
1
1
|
"""A module containing kwargs types for content correction and checking operations."""
|
2
2
|
|
3
3
|
from importlib.util import find_spec
|
4
|
-
from typing import
|
4
|
+
from typing import NotRequired, TypedDict
|
5
5
|
|
6
6
|
from fabricatio.models.extra.problem import Improvement
|
7
|
-
from fabricatio.models.extra.rag import MilvusDataBase
|
8
7
|
from fabricatio.models.extra.rule import RuleSet
|
9
8
|
from fabricatio.models.generic import SketchedAble
|
10
9
|
from fabricatio.models.kwargs_types import ReferencedKwargs
|
@@ -49,19 +48,13 @@ if find_spec("pymilvus"):
|
|
49
48
|
schema: CollectionSchema | None
|
50
49
|
index_params: IndexParams | None
|
51
50
|
|
52
|
-
class FetchKwargs
|
51
|
+
class FetchKwargs(TypedDict):
|
53
52
|
"""Arguments for fetching data from vector collections.
|
54
53
|
|
55
54
|
Controls how data is retrieved from vector databases, including filtering
|
56
55
|
and result limiting parameters.
|
57
56
|
"""
|
58
57
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
result_per_query: int
|
63
|
-
|
64
|
-
class RetrievalKwargs(FetchKwargs, total=False):
|
65
|
-
"""Arguments for retrieval operations."""
|
66
|
-
|
67
|
-
final_limit: int
|
58
|
+
collection_name: NotRequired[str | None]
|
59
|
+
similarity_threshold: NotRequired[float]
|
60
|
+
result_per_query: NotRequired[int]
|
@@ -0,0 +1,120 @@
|
|
1
|
+
"""A Module containing the article rag models."""
|
2
|
+
|
3
|
+
from pathlib import Path
|
4
|
+
from typing import ClassVar, Dict, List, Self, Unpack
|
5
|
+
|
6
|
+
from fabricatio.fs import safe_text_read
|
7
|
+
from fabricatio.journal import logger
|
8
|
+
from fabricatio.models.extra.rag import MilvusDataBase
|
9
|
+
from fabricatio.models.generic import AsPrompt
|
10
|
+
from fabricatio.models.kwargs_types import ChunkKwargs
|
11
|
+
from fabricatio.rust import BibManager, split_into_chunks
|
12
|
+
from fabricatio.utils import ok, wrapp_in_block
|
13
|
+
from more_itertools.recipes import flatten
|
14
|
+
from pydantic import Field
|
15
|
+
|
16
|
+
|
17
|
+
class ArticleChunk(MilvusDataBase, AsPrompt):
|
18
|
+
"""The chunk of an article."""
|
19
|
+
|
20
|
+
head_split: ClassVar[List[str]] = [
|
21
|
+
"引 言",
|
22
|
+
"引言",
|
23
|
+
"绪 论",
|
24
|
+
"绪论",
|
25
|
+
"前言",
|
26
|
+
"INTRODUCTION",
|
27
|
+
"Introduction",
|
28
|
+
]
|
29
|
+
tail_split: ClassVar[List[str]] = [
|
30
|
+
"参 考 文 献",
|
31
|
+
"参 考 文 献",
|
32
|
+
"参考文献",
|
33
|
+
"REFERENCES",
|
34
|
+
"References",
|
35
|
+
"Bibliography",
|
36
|
+
"Reference",
|
37
|
+
]
|
38
|
+
chunk: str
|
39
|
+
"""The segment of the article"""
|
40
|
+
year: int
|
41
|
+
"""The year of the article"""
|
42
|
+
authors: List[str] = Field(default_factory=list)
|
43
|
+
"""The authors of the article"""
|
44
|
+
article_title: str
|
45
|
+
"""The title of the article"""
|
46
|
+
bibtex_cite_key: str
|
47
|
+
"""The bibtex cite key of the article"""
|
48
|
+
|
49
|
+
def _as_prompt_inner(self) -> Dict[str, str]:
|
50
|
+
return {
|
51
|
+
self.article_title: f"{wrapp_in_block(self.chunk, 'Referring Content')}\n"
|
52
|
+
f"Authors: {';'.join(self.authors)}\n"
|
53
|
+
f"Published Year: {self.year}\n"
|
54
|
+
f"Bibtex Key: {self.bibtex_cite_key}\n",
|
55
|
+
}
|
56
|
+
|
57
|
+
def _prepare_vectorization_inner(self) -> str:
|
58
|
+
return self.chunk
|
59
|
+
|
60
|
+
@classmethod
|
61
|
+
def from_file[P: str | Path](
|
62
|
+
cls, path: P | List[P], bib_mgr: BibManager, **kwargs: Unpack[ChunkKwargs]
|
63
|
+
) -> List[Self]:
|
64
|
+
"""Load the article chunks from the file."""
|
65
|
+
if isinstance(path, list):
|
66
|
+
result = list(flatten(cls._from_file_inner(p, bib_mgr, **kwargs) for p in path))
|
67
|
+
logger.debug(f"Number of chunks created from list of files: {len(result)}")
|
68
|
+
return result
|
69
|
+
|
70
|
+
return cls._from_file_inner(path, bib_mgr, **kwargs)
|
71
|
+
|
72
|
+
@classmethod
|
73
|
+
def _from_file_inner(cls, path: str | Path, bib_mgr: BibManager, **kwargs: Unpack[ChunkKwargs]) -> List[Self]:
|
74
|
+
path = Path(path)
|
75
|
+
|
76
|
+
title_seg = path.stem.split(" - ").pop()
|
77
|
+
|
78
|
+
key = (
|
79
|
+
bib_mgr.get_cite_key_by_title(title_seg)
|
80
|
+
or bib_mgr.get_cite_key_by_title_fuzzy(title_seg)
|
81
|
+
or bib_mgr.get_cite_key_fuzzy(path.stem)
|
82
|
+
)
|
83
|
+
if key is None:
|
84
|
+
logger.warning(f"no cite key found for {path.as_posix()}, skip.")
|
85
|
+
return []
|
86
|
+
authors = ok(bib_mgr.get_author_by_key(key), f"no author found for {key}")
|
87
|
+
year = ok(bib_mgr.get_year_by_key(key), f"no year found for {key}")
|
88
|
+
article_title = ok(bib_mgr.get_title_by_key(key), f"no title found for {key}")
|
89
|
+
|
90
|
+
result = [
|
91
|
+
cls(chunk=c, year=year, authors=authors, article_title=article_title, bibtex_cite_key=key)
|
92
|
+
for c in split_into_chunks(cls.strip(safe_text_read(path)), **kwargs)
|
93
|
+
]
|
94
|
+
logger.debug(f"Number of chunks created from file {path.as_posix()}: {len(result)}")
|
95
|
+
return result
|
96
|
+
|
97
|
+
@classmethod
|
98
|
+
def strip(cls, string: str) -> str:
|
99
|
+
"""Strip the head and tail of the string."""
|
100
|
+
logger.debug(f"String length before strip: {(original := len(string))}")
|
101
|
+
for split in (s for s in cls.head_split if s in string):
|
102
|
+
logger.debug(f"Strip head using {split}")
|
103
|
+
parts = string.split(split)
|
104
|
+
string = split.join(parts[1:]) if len(parts) > 1 else parts[0]
|
105
|
+
break
|
106
|
+
logger.debug(
|
107
|
+
f"String length after head strip: {(stripped_len := len(string))}, decreased by {(d := original - stripped_len)}"
|
108
|
+
)
|
109
|
+
if not d:
|
110
|
+
logger.warning("No decrease at head strip, which is might be abnormal.")
|
111
|
+
for split in (s for s in cls.tail_split if s in string):
|
112
|
+
logger.debug(f"Strip tail using {split}")
|
113
|
+
parts = string.split(split)
|
114
|
+
string = split.join(parts[:-1]) if len(parts) > 1 else parts[0]
|
115
|
+
break
|
116
|
+
logger.debug(f"String length after tail strip: {len(string)}, decreased by {(d := stripped_len - len(string))}")
|
117
|
+
if not d:
|
118
|
+
logger.warning("No decrease at tail strip, which is might be abnormal.")
|
119
|
+
|
120
|
+
return string
|
@@ -1,8 +1,9 @@
|
|
1
1
|
"""ArticleEssence: Semantic fingerprint of academic paper for structured analysis."""
|
2
2
|
|
3
|
-
from typing import List
|
3
|
+
from typing import List
|
4
4
|
|
5
|
-
from fabricatio.models.
|
5
|
+
from fabricatio.models.extra.rag import MilvusDataBase
|
6
|
+
from fabricatio.models.generic import PersistentAble, SketchedAble
|
6
7
|
from pydantic import BaseModel
|
7
8
|
|
8
9
|
|
@@ -54,7 +55,7 @@ class Highlightings(BaseModel):
|
|
54
55
|
"""
|
55
56
|
|
56
57
|
|
57
|
-
class ArticleEssence(
|
58
|
+
class ArticleEssence(SketchedAble, PersistentAble, MilvusDataBase):
|
58
59
|
"""Structured representation of a scientific article's core elements in its original language."""
|
59
60
|
|
60
61
|
language: str
|
@@ -93,7 +94,7 @@ class ArticleEssence(ProposedAble, Display, PersistentAble, Vectorizable):
|
|
93
94
|
bibtex_cite_key: str
|
94
95
|
"""Bibtex cite key of the original article."""
|
95
96
|
|
96
|
-
def
|
97
|
-
|
98
|
-
|
99
|
-
|
97
|
+
def _prepare_vectorization_inner(self) -> str:
|
98
|
+
return self.compact()
|
99
|
+
|
100
|
+
|
fabricatio/models/extra/rag.py
CHANGED
@@ -1,10 +1,13 @@
|
|
1
1
|
"""A module containing the RAG (Retrieval-Augmented Generation) models."""
|
2
2
|
|
3
|
-
from abc import
|
4
|
-
from
|
3
|
+
from abc import ABC
|
4
|
+
from functools import partial
|
5
|
+
from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Self, Sequence, Set
|
5
6
|
|
6
7
|
from fabricatio.decorators import precheck_package
|
7
|
-
from
|
8
|
+
from fabricatio.models.generic import Vectorizable
|
9
|
+
from fabricatio.utils import ok
|
10
|
+
from pydantic import JsonValue
|
8
11
|
|
9
12
|
if TYPE_CHECKING:
|
10
13
|
from importlib.util import find_spec
|
@@ -15,14 +18,18 @@ if TYPE_CHECKING:
|
|
15
18
|
from pymilvus import CollectionSchema
|
16
19
|
|
17
20
|
|
18
|
-
class MilvusDataBase(
|
21
|
+
class MilvusDataBase(Vectorizable, ABC):
|
19
22
|
"""A base class for Milvus data."""
|
20
23
|
|
21
|
-
model_config = ConfigDict(use_attribute_docstrings=True)
|
22
|
-
|
23
24
|
primary_field_name: ClassVar[str] = "id"
|
24
|
-
|
25
|
+
"""The name of the primary field in Milvus."""
|
25
26
|
vector_field_name: ClassVar[str] = "vector"
|
27
|
+
"""The name of the vector field in Milvus."""
|
28
|
+
|
29
|
+
index_type: ClassVar[str] = "FLAT"
|
30
|
+
"""The type of index to be used in Milvus."""
|
31
|
+
metric_type: ClassVar[str] = "COSINE"
|
32
|
+
"""The type of metric to be used in Milvus."""
|
26
33
|
|
27
34
|
def prepare_insertion(self, vector: List[float]) -> Dict[str, Any]:
|
28
35
|
"""Prepares the data for insertion into Milvus.
|
@@ -32,11 +39,6 @@ class MilvusDataBase(BaseModel, metaclass=ABCMeta):
|
|
32
39
|
"""
|
33
40
|
return {**self.model_dump(exclude_none=True, by_alias=True), self.vector_field_name: vector}
|
34
41
|
|
35
|
-
@property
|
36
|
-
@abstractmethod
|
37
|
-
def to_vectorize(self) -> str:
|
38
|
-
"""The text representation of the data."""
|
39
|
-
|
40
42
|
@classmethod
|
41
43
|
@precheck_package(
|
42
44
|
"pymilvus", "pymilvus is not installed. Have you installed `fabricatio[rag]` instead of `fabricatio`?"
|
@@ -50,23 +52,47 @@ class MilvusDataBase(BaseModel, metaclass=ABCMeta):
|
|
50
52
|
FieldSchema(cls.vector_field_name, dtype=DataType.FLOAT_VECTOR, dim=dimension),
|
51
53
|
]
|
52
54
|
|
53
|
-
type_mapping = {
|
54
|
-
str: DataType.STRING,
|
55
|
-
int: DataType.INT64,
|
56
|
-
float: DataType.DOUBLE,
|
57
|
-
JsonValue: DataType.JSON,
|
58
|
-
# TODO add more mapping
|
59
|
-
}
|
60
|
-
|
61
55
|
for k, v in cls.model_fields.items():
|
62
56
|
k: str
|
63
57
|
v: FieldInfo
|
64
|
-
|
65
|
-
|
66
|
-
|
58
|
+
schema = partial(FieldSchema, k, description=v.description or "")
|
59
|
+
anno = ok(v.annotation)
|
60
|
+
|
61
|
+
if anno == int:
|
62
|
+
fields.append(schema(dtype=DataType.INT64))
|
63
|
+
elif anno == str:
|
64
|
+
fields.append(schema(dtype=DataType.VARCHAR, max_length=65535))
|
65
|
+
elif anno == float:
|
66
|
+
fields.append(schema(dtype=DataType.DOUBLE))
|
67
|
+
elif anno == list[str] or anno == List[str] or anno == set[str] or anno == Set[str]:
|
68
|
+
fields.append(
|
69
|
+
schema(dtype=DataType.ARRAY, element_type=DataType.VARCHAR, max_length=65535, max_capacity=4096)
|
70
|
+
)
|
71
|
+
elif anno == list[int] or anno == List[int] or anno == set[int] or anno == Set[int]:
|
72
|
+
fields.append(schema(dtype=DataType.ARRAY, element_type=DataType.INT64, max_capacity=4096))
|
73
|
+
elif anno == list[float] or anno == List[float] or anno == set[float] or anno == Set[float]:
|
74
|
+
fields.append(schema(dtype=DataType.ARRAY, element_type=DataType.DOUBLE, max_capacity=4096))
|
75
|
+
elif anno == JsonValue:
|
76
|
+
fields.append(schema(dtype=DataType.JSON))
|
77
|
+
|
78
|
+
else:
|
79
|
+
raise NotImplementedError(f"{k}:{anno} is not supported")
|
80
|
+
|
67
81
|
return CollectionSchema(fields)
|
68
82
|
|
69
83
|
@classmethod
|
70
84
|
def from_sequence(cls, data: Sequence[Dict[str, Any]]) -> List[Self]:
|
71
85
|
"""Constructs a list of instances from a sequence of dictionaries."""
|
72
86
|
return [cls(**d) for d in data]
|
87
|
+
|
88
|
+
|
89
|
+
class MilvusClassicModel(MilvusDataBase):
|
90
|
+
"""A class representing a classic model stored in Milvus."""
|
91
|
+
|
92
|
+
text: str
|
93
|
+
"""The text to be stored in Milvus."""
|
94
|
+
subject: str = ""
|
95
|
+
"""The subject of the text."""
|
96
|
+
|
97
|
+
def _prepare_vectorization_inner(self) -> str:
|
98
|
+
return self.text
|
fabricatio/models/generic.py
CHANGED
@@ -6,7 +6,6 @@ from pathlib import Path
|
|
6
6
|
from typing import Any, Callable, Dict, Iterable, List, Optional, Self, Type, Union, final, overload
|
7
7
|
|
8
8
|
import orjson
|
9
|
-
import rtoml
|
10
9
|
from fabricatio.config import configs
|
11
10
|
from fabricatio.fs.readers import MAGIKA, safe_text_read
|
12
11
|
from fabricatio.journal import logger
|
@@ -53,7 +52,7 @@ class Display(Base):
|
|
53
52
|
Returns:
|
54
53
|
str: JSON string with 1-level indentation for readability
|
55
54
|
"""
|
56
|
-
return self.model_dump_json(indent=1,by_alias=True)
|
55
|
+
return self.model_dump_json(indent=1, by_alias=True)
|
57
56
|
|
58
57
|
def compact(self) -> str:
|
59
58
|
"""Generate compact JSON representation.
|
@@ -225,7 +224,7 @@ class PersistentAble(Base):
|
|
225
224
|
- Hash generated from JSON content ensures uniqueness
|
226
225
|
"""
|
227
226
|
p = Path(path)
|
228
|
-
out = self.model_dump_json(indent=1,by_alias=True)
|
227
|
+
out = self.model_dump_json(indent=1, by_alias=True)
|
229
228
|
|
230
229
|
# Generate a timestamp in the format YYYYMMDD_HHMMSS
|
231
230
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
@@ -299,16 +298,18 @@ class Language(Base):
|
|
299
298
|
"""Class that provides a language attribute."""
|
300
299
|
|
301
300
|
@property
|
302
|
-
def language(self)->str:
|
301
|
+
def language(self) -> str:
|
303
302
|
"""Get the language of the object."""
|
304
|
-
if isinstance(self,Described):
|
303
|
+
if isinstance(self, Described):
|
305
304
|
return detect_language(self.description)
|
306
|
-
if isinstance(self,Titled):
|
305
|
+
if isinstance(self, Titled):
|
307
306
|
return detect_language(self.title)
|
308
|
-
if isinstance(self,Named):
|
307
|
+
if isinstance(self, Named):
|
309
308
|
return detect_language(self.name)
|
310
309
|
|
311
310
|
return detect_language(self.model_dump_json(by_alias=True))
|
311
|
+
|
312
|
+
|
312
313
|
class ModelHash(Base):
|
313
314
|
"""Class that provides a hash value for the object.
|
314
315
|
|
@@ -550,7 +551,7 @@ class FinalizedDumpAble(Base):
|
|
550
551
|
Returns:
|
551
552
|
str: The finalized dump of the object.
|
552
553
|
"""
|
553
|
-
return self.model_dump_json(indent=1,by_alias=True)
|
554
|
+
return self.model_dump_json(indent=1, by_alias=True)
|
554
555
|
|
555
556
|
def finalized_dump_to(self, path: str | Path) -> Self:
|
556
557
|
"""Finalize the dump of the object to a file.
|
@@ -662,8 +663,9 @@ class Vectorizable(Base):
|
|
662
663
|
This class includes methods to prepare the model for vectorization, ensuring it fits within a specified token length.
|
663
664
|
"""
|
664
665
|
|
666
|
+
@abstractmethod
|
665
667
|
def _prepare_vectorization_inner(self) -> str:
|
666
|
-
|
668
|
+
"""Prepare the model for vectorization."""
|
667
669
|
|
668
670
|
@final
|
669
671
|
def prepare_vectorization(self, max_length: Optional[int] = None) -> str:
|
@@ -681,8 +683,7 @@ class Vectorizable(Base):
|
|
681
683
|
max_length = max_length or configs.embedding.max_sequence_length
|
682
684
|
chunk = self._prepare_vectorization_inner()
|
683
685
|
if max_length and (length := token_counter(text=chunk)) > max_length:
|
684
|
-
|
685
|
-
raise ValueError(err)
|
686
|
+
raise ValueError(f"Chunk exceeds maximum sequence length {max_length}, got {length}, see \n{chunk}")
|
686
687
|
|
687
688
|
return chunk
|
688
689
|
|
@@ -1,11 +1,18 @@
|
|
1
1
|
"""This module contains the types for the keyword arguments of the methods in the models module."""
|
2
2
|
|
3
|
-
from typing import Any, Dict, List, Optional, Required, TypedDict
|
3
|
+
from typing import Any, Dict, List, NotRequired, Optional, Required, TypedDict
|
4
4
|
|
5
5
|
from litellm.caching.caching import CacheMode
|
6
6
|
from litellm.types.caching import CachingSupportedCallTypes
|
7
7
|
|
8
8
|
|
9
|
+
class ChunkKwargs(TypedDict):
|
10
|
+
"""Configuration parameters for chunking operations."""
|
11
|
+
|
12
|
+
max_chunk_size: int
|
13
|
+
max_overlapping_rate: NotRequired[float]
|
14
|
+
|
15
|
+
|
9
16
|
class EmbeddingKwargs(TypedDict, total=False):
|
10
17
|
"""Configuration parameters for text embedding operations.
|
11
18
|
|
Binary file
|
fabricatio/rust.pyi
CHANGED
@@ -147,7 +147,7 @@ class BibManager:
|
|
147
147
|
RuntimeError: If file cannot be read or parsed
|
148
148
|
"""
|
149
149
|
|
150
|
-
def
|
150
|
+
def get_cite_key_by_title(self, title: str) -> Optional[str]:
|
151
151
|
"""Find citation key by exact title match.
|
152
152
|
|
153
153
|
Args:
|
@@ -156,6 +156,15 @@ class BibManager:
|
|
156
156
|
Returns:
|
157
157
|
Citation key if exact match found, None otherwise
|
158
158
|
"""
|
159
|
+
def get_cite_key_by_title_fuzzy(self, title: str) -> Optional[str]:
|
160
|
+
"""Find citation key by fuzzy title match.
|
161
|
+
|
162
|
+
Args:
|
163
|
+
title: Search term to find in bibliography entries
|
164
|
+
|
165
|
+
Returns:
|
166
|
+
Citation key of best matching entry, or None if no good match
|
167
|
+
"""
|
159
168
|
|
160
169
|
def get_cite_key_fuzzy(self, query: str) -> Optional[str]:
|
161
170
|
"""Find best matching citation using fuzzy text search.
|
fabricatio/utils.py
CHANGED
@@ -25,7 +25,7 @@ async def ask_edit(
|
|
25
25
|
return res
|
26
26
|
|
27
27
|
|
28
|
-
def override_kwargs(kwargs: Mapping[str,Any], **overrides) -> Dict[str, Any]:
|
28
|
+
def override_kwargs(kwargs: Mapping[str, Any], **overrides) -> Dict[str, Any]:
|
29
29
|
"""Override the values in kwargs with the provided overrides."""
|
30
30
|
new_kwargs = dict(kwargs.items())
|
31
31
|
new_kwargs.update({k: v for k, v in overrides.items() if v is not None})
|
@@ -52,3 +52,16 @@ def ok[T](val: Optional[T], msg: str = "Value is None") -> T:
|
|
52
52
|
if val is None:
|
53
53
|
raise ValueError(msg)
|
54
54
|
return val
|
55
|
+
|
56
|
+
|
57
|
+
def wrapp_in_block(string: str, title: str) -> str:
|
58
|
+
"""Wraps a string in a block with a title.
|
59
|
+
|
60
|
+
Args:
|
61
|
+
string: The string to wrap.
|
62
|
+
title: The title of the block.
|
63
|
+
|
64
|
+
Returns:
|
65
|
+
str: The wrapped string.
|
66
|
+
"""
|
67
|
+
return f"--- Start of {title} ---\n{string}\n--- End of {title} ---"
|
Binary file
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: fabricatio
|
3
|
-
Version: 0.2.10.
|
3
|
+
Version: 0.2.10.dev1
|
4
4
|
Classifier: License :: OSI Approved :: MIT License
|
5
5
|
Classifier: Programming Language :: Rust
|
6
6
|
Classifier: Programming Language :: Python :: 3.12
|
@@ -23,7 +23,6 @@ Requires-Dist: pymitter>=1.0.0
|
|
23
23
|
Requires-Dist: questionary>=2.1.0
|
24
24
|
Requires-Dist: regex>=2024.11.6
|
25
25
|
Requires-Dist: rich>=13.9.4
|
26
|
-
Requires-Dist: rtoml>=0.12.0
|
27
26
|
Requires-Dist: pymilvus>=2.5.4 ; extra == 'rag'
|
28
27
|
Requires-Dist: fabricatio[calc,plot,rag] ; extra == 'full'
|
29
28
|
Requires-Dist: sympy>=1.13.3 ; extra == 'calc'
|
@@ -45,8 +44,6 @@ Project-URL: Issues, https://github.com/Whth/fabricatio/issues
|
|
45
44
|
# Fabricatio
|
46
45
|
|
47
46
|

|
48
|
-

|
49
|
-

|
50
47
|
|
51
48
|
## Overview
|
52
49
|
|
@@ -1,10 +1,10 @@
|
|
1
|
-
fabricatio-0.2.10.
|
2
|
-
fabricatio-0.2.10.
|
3
|
-
fabricatio-0.2.10.
|
4
|
-
fabricatio/actions/article.py,sha256=
|
5
|
-
fabricatio/actions/article_rag.py,sha256=
|
1
|
+
fabricatio-0.2.10.dev1.dist-info/METADATA,sha256=HRPFnRmPH19wYpcE1dJoL6Kltg2vewsF432CMSqV-Yg,5118
|
2
|
+
fabricatio-0.2.10.dev1.dist-info/WHEEL,sha256=jABKVkLC9kJr8mi_er5jOqpiQUjARSLXDUIIxDqsS50,96
|
3
|
+
fabricatio-0.2.10.dev1.dist-info/licenses/LICENSE,sha256=do7J7EiCGbq0QPbMAL_FqLYufXpHnCnXBOuqVPwSV8Y,1088
|
4
|
+
fabricatio/actions/article.py,sha256=0PE-b47WvBQpa4XPwc4sMe11GY8KO71N4pui_Yrnz_I,8993
|
5
|
+
fabricatio/actions/article_rag.py,sha256=79466dKS1TaT2rw5gadM1WfZoRJy07LmtoMXvfCZ2-U,5952
|
6
6
|
fabricatio/actions/output.py,sha256=gkC2u_VpMJ6jOnbyRAJN24UVK7iDAMzhItYukaW8Spk,6498
|
7
|
-
fabricatio/actions/rag.py,sha256=
|
7
|
+
fabricatio/actions/rag.py,sha256=9fM4oR5B4AJNhKmWfUlNIeF4QkUntQscICNVo_zWPSA,3580
|
8
8
|
fabricatio/actions/rules.py,sha256=SNvAvQx4xUare16Za_dEpYlYI_PJNnbiO-E0XDa5JT4,2857
|
9
9
|
fabricatio/actions/__init__.py,sha256=wVENCFtpVb1rLFxoOFJt9-8smLWXuJV7IwA8P3EfFz4,48
|
10
10
|
fabricatio/capabilities/advanced_judge.py,sha256=selB0Gwf1F4gGJlwBiRo6gI4KOUROgh3WnzO3mZFEls,706
|
@@ -12,7 +12,7 @@ fabricatio/capabilities/censor.py,sha256=bBT5qy-kp7fh8g4Lz3labSwxwJ60gGd_vrkc6k1
|
|
12
12
|
fabricatio/capabilities/check.py,sha256=kYqzohhv2bZfl1aKSUt7a8snT8YEl2zgha_ZdAdMMfQ,8622
|
13
13
|
fabricatio/capabilities/correct.py,sha256=W_cInqlciNEhyMK0YI53jk4EvW9uAdge90IO9OElUmA,10420
|
14
14
|
fabricatio/capabilities/propose.py,sha256=hkBeSlmcTdfYWT-ph6nlbtHXBozi_JXqXlWcnBy3W78,2007
|
15
|
-
fabricatio/capabilities/rag.py,sha256=
|
15
|
+
fabricatio/capabilities/rag.py,sha256=kqcunWBC6oA4P1rzIG2Xu9zqSg73H3uKPF41JJQ1HVI,9595
|
16
16
|
fabricatio/capabilities/rating.py,sha256=Wt_H5fA1H4XuZGIMI8pr0cp_6jnXJABlo8lfU_4Fp5A,17645
|
17
17
|
fabricatio/capabilities/review.py,sha256=-EMZe0ADFPT6fPGmra16UPjJC1M3rAs6dPFdTZ88Fgg,5060
|
18
18
|
fabricatio/capabilities/task.py,sha256=JahC61X233UIPsjovxJgc_yqj_BjWZJBCzJZq11M2Xk,4417
|
@@ -26,37 +26,38 @@ fabricatio/fs/readers.py,sha256=M5kojKWsJQMQpE4CBbYvas0JKmPaiaYSfWmiqJx1SP4,1884
|
|
26
26
|
fabricatio/fs/__init__.py,sha256=PCf0s_9KDjVfNw7AfPoJzGt3jMq4gJOfbcT4pb0D0ZY,588
|
27
27
|
fabricatio/journal.py,sha256=stnEP88aUBA_GmU9gfTF2EZI8FS2OyMLGaMSTgK4QgA,476
|
28
28
|
fabricatio/models/action.py,sha256=Kfa-zojgHQ1vPoC2lQp-thTTp0oySKn7k6I4ea6iYTs,9837
|
29
|
-
fabricatio/models/adv_kwargs_types.py,sha256=
|
29
|
+
fabricatio/models/adv_kwargs_types.py,sha256=kUO-SiZtFuz5cZCmMLnJJ9tjQ4-Zd_foo6R8HQMlM5A,1950
|
30
30
|
fabricatio/models/events.py,sha256=wiirk_ASg3iXDOZU_gIimci1VZVzWE1nDmxy-hQVJ9M,4150
|
31
31
|
fabricatio/models/extra/advanced_judge.py,sha256=INUl_41C8jkausDekkjnEmTwNfLCJ23TwFjq2cM23Cw,1092
|
32
|
+
fabricatio/models/extra/aricle_rag.py,sha256=I65Dcip3iibQdkACPF-EgYv7bSlpXB9oj8eq-R-Tjdc,4681
|
32
33
|
fabricatio/models/extra/article_base.py,sha256=DxBex4UsMAFmHmriwXkcvGIuU-WTSD4ZfzDEk-no9TA,11894
|
33
|
-
fabricatio/models/extra/article_essence.py,sha256=
|
34
|
+
fabricatio/models/extra/article_essence.py,sha256=mlIkkRMR3I1RtqiiOnmIE3Vy623L4eECumkRzryE1pw,2749
|
34
35
|
fabricatio/models/extra/article_main.py,sha256=zGzcf51abcWwiaX6iyi2V7upBLa-DBovnpTJj-qYLeA,7878
|
35
36
|
fabricatio/models/extra/article_outline.py,sha256=w7O0SHgC7exbptWVbR62FMHAueMgBpyWKVYMGGl_oj8,1427
|
36
37
|
fabricatio/models/extra/article_proposal.py,sha256=NbyjW-7UiFPtnVD9nte75re4xL2pD4qL29PpNV4Cg_M,1870
|
37
38
|
fabricatio/models/extra/patches.py,sha256=_WNCxtYzzsVfUxI16vu4IqsLahLYRHdbQN9er9tqhC0,997
|
38
39
|
fabricatio/models/extra/problem.py,sha256=zZEnjBW2XGRVpJpUp09f1J_w5A1zU-LhxX78AVCq9ts,7113
|
39
|
-
fabricatio/models/extra/rag.py,sha256=
|
40
|
+
fabricatio/models/extra/rag.py,sha256=RMi8vhEPB0I5mVmjRLRLxYHUnm9pFhvVwysaIwmW2s0,3955
|
40
41
|
fabricatio/models/extra/rule.py,sha256=KQQELVhCLUXhEZ35jU3WGYqKHuCYEAkn0p6pxAE-hOU,2625
|
41
42
|
fabricatio/models/extra/__init__.py,sha256=XlYnS_2B9nhLhtQkjE7rvvfPmAAtXVdNi9bSDAR-Ge8,54
|
42
|
-
fabricatio/models/generic.py,sha256=
|
43
|
-
fabricatio/models/kwargs_types.py,sha256=
|
43
|
+
fabricatio/models/generic.py,sha256=M6K4uMSy4zKoTX5LyZFB8vXw8dTR9nZqec84eE-vPfw,30643
|
44
|
+
fabricatio/models/kwargs_types.py,sha256=r0fgI4ExuAc0MMsgWs8fAyaQ9Z_PRRAKTr53pPP5JYY,4747
|
44
45
|
fabricatio/models/role.py,sha256=-CRcj5_M3_ciLPzwiNn92grBmwoSLQ-n4koVZiCNTBM,2953
|
45
46
|
fabricatio/models/task.py,sha256=SxWI-b5jlQcGmNsjQ2aKDyywXwGiUvCR1rgUhk-pli8,10503
|
46
47
|
fabricatio/models/tool.py,sha256=jQ51g4lwTPfsMF1nbreDJtBczbxIHoXcPuLSOqHliq8,12506
|
47
48
|
fabricatio/models/usages.py,sha256=VLBpNs7zfNPqROvI2IXlqsoqKYSW8L6usNwZ1HXZVOY,34339
|
48
49
|
fabricatio/parser.py,sha256=qN2godNsArmb90btOMxgqlol57166DyYsV2JlU8DlHs,6532
|
49
50
|
fabricatio/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
50
|
-
fabricatio/rust.pyi,sha256=
|
51
|
+
fabricatio/rust.pyi,sha256=uVHcjDkG4gPcWX_7pxJXHroamY6Db46tQci96THbwJs,7280
|
51
52
|
fabricatio/rust_instances.py,sha256=Byeo8KHW_dJiXujJq7YPGDLBX5bHNDYbBc4sY3uubVY,313
|
52
53
|
fabricatio/toolboxes/arithmetic.py,sha256=WLqhY-Pikv11Y_0SGajwZx3WhsLNpHKf9drzAqOf_nY,1369
|
53
54
|
fabricatio/toolboxes/fs.py,sha256=l4L1CVxJmjw9Ld2XUpIlWfV0_Fu_2Og6d3E13I-S4aE,736
|
54
55
|
fabricatio/toolboxes/__init__.py,sha256=KBJi5OG_pExscdlM7Bnt_UF43j4I3Lv6G71kPVu4KQU,395
|
55
|
-
fabricatio/utils.py,sha256=
|
56
|
+
fabricatio/utils.py,sha256=PKb2yfAe7iRwGJklLB5uZWuWhT0Tm47iHAqPo-zl5CQ,2039
|
56
57
|
fabricatio/workflows/articles.py,sha256=ObYTFUqLUk_CzdmmnX6S7APfxcGmPFqnFr9pdjU7Z4Y,969
|
57
58
|
fabricatio/workflows/rag.py,sha256=-YYp2tlE9Vtfgpg6ROpu6QVO8j8yVSPa6yDzlN3qVxs,520
|
58
59
|
fabricatio/workflows/__init__.py,sha256=5ScFSTA-bvhCesj3U9Mnmi6Law6N1fmh5UKyh58L3u8,51
|
59
60
|
fabricatio/__init__.py,sha256=Rmvq2VgdS2u68vnOi2i5RbeWbAwrJDbk8D8D883PJWE,1022
|
60
|
-
fabricatio/rust.cp312-win_amd64.pyd,sha256=
|
61
|
-
fabricatio-0.2.10.
|
62
|
-
fabricatio-0.2.10.
|
61
|
+
fabricatio/rust.cp312-win_amd64.pyd,sha256=ql93jn1qacym6Ks927dxEGJb16rUyWPiW85fm9IE8A0,2251776
|
62
|
+
fabricatio-0.2.10.dev1.data/scripts/tdown.exe,sha256=WFQ7z3utWNkccmrzZPzJTb4N0_IBWrjirdWSOKcrj_0,3365888
|
63
|
+
fabricatio-0.2.10.dev1.dist-info/RECORD,,
|
File without changes
|
File without changes
|