fabricatio 0.3.15.dev5__cp312-cp312-win_amd64.whl → 0.4.5.dev0__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fabricatio/__init__.py +7 -8
- fabricatio/actions/__init__.py +69 -1
- fabricatio/capabilities/__init__.py +63 -1
- fabricatio/models/__init__.py +51 -0
- fabricatio/rust.cp312-win_amd64.pyd +0 -0
- fabricatio/toolboxes/__init__.py +2 -1
- fabricatio/toolboxes/arithmetic.py +1 -1
- fabricatio/toolboxes/fs.py +2 -2
- fabricatio/workflows/__init__.py +9 -0
- fabricatio-0.4.5.dev0.data/scripts/tdown.exe +0 -0
- {fabricatio-0.3.15.dev5.dist-info → fabricatio-0.4.5.dev0.dist-info}/METADATA +58 -27
- fabricatio-0.4.5.dev0.dist-info/RECORD +15 -0
- fabricatio/actions/article.py +0 -415
- fabricatio/actions/article_rag.py +0 -407
- fabricatio/actions/fs.py +0 -25
- fabricatio/actions/output.py +0 -247
- fabricatio/actions/rag.py +0 -96
- fabricatio/actions/rules.py +0 -83
- fabricatio/capabilities/advanced_judge.py +0 -20
- fabricatio/capabilities/advanced_rag.py +0 -61
- fabricatio/capabilities/censor.py +0 -105
- fabricatio/capabilities/check.py +0 -212
- fabricatio/capabilities/correct.py +0 -228
- fabricatio/capabilities/extract.py +0 -74
- fabricatio/capabilities/propose.py +0 -65
- fabricatio/capabilities/rag.py +0 -264
- fabricatio/capabilities/rating.py +0 -404
- fabricatio/capabilities/review.py +0 -114
- fabricatio/capabilities/task.py +0 -113
- fabricatio/decorators.py +0 -253
- fabricatio/emitter.py +0 -177
- fabricatio/fs/__init__.py +0 -35
- fabricatio/fs/curd.py +0 -153
- fabricatio/fs/readers.py +0 -61
- fabricatio/journal.py +0 -12
- fabricatio/models/action.py +0 -263
- fabricatio/models/adv_kwargs_types.py +0 -63
- fabricatio/models/extra/__init__.py +0 -1
- fabricatio/models/extra/advanced_judge.py +0 -32
- fabricatio/models/extra/aricle_rag.py +0 -286
- fabricatio/models/extra/article_base.py +0 -488
- fabricatio/models/extra/article_essence.py +0 -98
- fabricatio/models/extra/article_main.py +0 -285
- fabricatio/models/extra/article_outline.py +0 -45
- fabricatio/models/extra/article_proposal.py +0 -52
- fabricatio/models/extra/patches.py +0 -20
- fabricatio/models/extra/problem.py +0 -165
- fabricatio/models/extra/rag.py +0 -98
- fabricatio/models/extra/rule.py +0 -51
- fabricatio/models/generic.py +0 -904
- fabricatio/models/kwargs_types.py +0 -121
- fabricatio/models/role.py +0 -156
- fabricatio/models/task.py +0 -310
- fabricatio/models/tool.py +0 -328
- fabricatio/models/usages.py +0 -791
- fabricatio/parser.py +0 -114
- fabricatio/rust.pyi +0 -846
- fabricatio/utils.py +0 -156
- fabricatio/workflows/articles.py +0 -24
- fabricatio/workflows/rag.py +0 -11
- fabricatio-0.3.15.dev5.data/scripts/tdown.exe +0 -0
- fabricatio-0.3.15.dev5.data/scripts/ttm.exe +0 -0
- fabricatio-0.3.15.dev5.dist-info/RECORD +0 -63
- {fabricatio-0.3.15.dev5.dist-info → fabricatio-0.4.5.dev0.dist-info}/WHEEL +0 -0
- {fabricatio-0.3.15.dev5.dist-info → fabricatio-0.4.5.dev0.dist-info}/licenses/LICENSE +0 -0
@@ -1,228 +0,0 @@
|
|
1
|
-
"""A module containing the Correct capability for reviewing, validating, and improving objects."""
|
2
|
-
|
3
|
-
from abc import ABC
|
4
|
-
from asyncio import gather
|
5
|
-
from typing import Optional, Type, Unpack, cast
|
6
|
-
|
7
|
-
from fabricatio.capabilities.propose import Propose
|
8
|
-
from fabricatio.capabilities.rating import Rating
|
9
|
-
from fabricatio.journal import logger
|
10
|
-
from fabricatio.models.adv_kwargs_types import CorrectKwargs
|
11
|
-
from fabricatio.models.extra.problem import Improvement, ProblemSolutions
|
12
|
-
from fabricatio.models.generic import ProposedUpdateAble, SketchedAble
|
13
|
-
from fabricatio.models.kwargs_types import (
|
14
|
-
BestKwargs,
|
15
|
-
ValidateKwargs,
|
16
|
-
)
|
17
|
-
from fabricatio.rust import CONFIG, TEMPLATE_MANAGER
|
18
|
-
from fabricatio.utils import fallback_kwargs, ok, override_kwargs
|
19
|
-
|
20
|
-
|
21
|
-
class Correct(Rating, Propose, ABC):
|
22
|
-
"""A class that provides the capability to correct objects."""
|
23
|
-
|
24
|
-
async def decide_solution(
|
25
|
-
self, problem_solutions: ProblemSolutions, **kwargs: Unpack[BestKwargs]
|
26
|
-
) -> ProblemSolutions:
|
27
|
-
"""Decide the best solution from a list of problem solutions.
|
28
|
-
|
29
|
-
Args:
|
30
|
-
problem_solutions (ProblemSolutions): The problem solutions to evaluate.
|
31
|
-
**kwargs (Unpack[BestKwargs]): Additional keyword arguments for the decision process.
|
32
|
-
|
33
|
-
Returns:
|
34
|
-
ProblemSolutions: The problem solutions with the best solution selected.
|
35
|
-
"""
|
36
|
-
if (leng := len(problem_solutions.solutions)) == 0:
|
37
|
-
logger.error(f"No solutions found in ProblemSolutions, Skip: `{problem_solutions.problem.name}`")
|
38
|
-
if leng > 1:
|
39
|
-
logger.info(f"{leng} solutions found in Problem `{problem_solutions.problem.name}`, select the best.")
|
40
|
-
problem_solutions.solutions = await self.best(problem_solutions.solutions, **kwargs)
|
41
|
-
return problem_solutions
|
42
|
-
|
43
|
-
async def decide_improvement(self, improvement: Improvement, **kwargs: Unpack[BestKwargs]) -> Improvement:
|
44
|
-
"""Decide the best solution for each problem solution in an improvement.
|
45
|
-
|
46
|
-
Args:
|
47
|
-
improvement (Improvement): The improvement containing problem solutions to evaluate.
|
48
|
-
**kwargs (Unpack[BestKwargs]): Additional keyword arguments for the decision process.
|
49
|
-
|
50
|
-
Returns:
|
51
|
-
Improvement: The improvement with the best solutions selected for each problem solution.
|
52
|
-
"""
|
53
|
-
if leng := len(improvement.problem_solutions):
|
54
|
-
logger.debug(f"{leng} problem_solutions found in Improvement, decide solution for each of them.")
|
55
|
-
await gather(
|
56
|
-
*[
|
57
|
-
self.decide_solution(
|
58
|
-
ps,
|
59
|
-
**fallback_kwargs(
|
60
|
-
kwargs, topic=f"which solution is better to deal this problem {ps.problem.description}\n\n"
|
61
|
-
),
|
62
|
-
)
|
63
|
-
for ps in improvement.problem_solutions
|
64
|
-
],
|
65
|
-
)
|
66
|
-
if any(not (violated := ps).decided() for ps in improvement.problem_solutions):
|
67
|
-
logger.error(f"Some problem_solutions are not decided: {violated}")
|
68
|
-
else:
|
69
|
-
logger.success(f"All problem_solutions are decided '{improvement.focused_on}'")
|
70
|
-
else:
|
71
|
-
logger.error(f"No problem_solutions found in Improvement, Skip: {improvement}")
|
72
|
-
return improvement
|
73
|
-
|
74
|
-
async def fix_troubled_obj[M: SketchedAble](
|
75
|
-
self,
|
76
|
-
obj: M,
|
77
|
-
problem_solutions: ProblemSolutions,
|
78
|
-
reference: str = "",
|
79
|
-
**kwargs: Unpack[ValidateKwargs[M]],
|
80
|
-
) -> Optional[M]:
|
81
|
-
"""Fix a troubled object based on problem solutions.
|
82
|
-
|
83
|
-
Args:
|
84
|
-
obj (M): The object to be fixed.
|
85
|
-
problem_solutions (ProblemSolutions): The problem solutions to apply.
|
86
|
-
reference (str): A reference or contextual information for the object.
|
87
|
-
**kwargs (Unpack[ValidateKwargs[M]]): Additional keyword arguments for the validation process.
|
88
|
-
|
89
|
-
Returns:
|
90
|
-
Optional[M]: The fixed object, or None if fixing fails.
|
91
|
-
"""
|
92
|
-
return await self.propose(
|
93
|
-
cast("Type[M]", obj.__class__),
|
94
|
-
TEMPLATE_MANAGER.render_template(
|
95
|
-
CONFIG.templates.fix_troubled_obj_template,
|
96
|
-
{
|
97
|
-
"problem": problem_solutions.problem.display(),
|
98
|
-
"solution": ok(
|
99
|
-
problem_solutions.final_solution(),
|
100
|
-
f"{len(problem_solutions.solutions)} solution Found for `{problem_solutions.problem.name}`.",
|
101
|
-
).display(),
|
102
|
-
"reference": reference,
|
103
|
-
},
|
104
|
-
),
|
105
|
-
**kwargs,
|
106
|
-
)
|
107
|
-
|
108
|
-
async def fix_troubled_string(
|
109
|
-
self,
|
110
|
-
input_text: str,
|
111
|
-
problem_solutions: ProblemSolutions,
|
112
|
-
reference: str = "",
|
113
|
-
**kwargs: Unpack[ValidateKwargs[str]],
|
114
|
-
) -> Optional[str]:
|
115
|
-
"""Fix a troubled string based on problem solutions.
|
116
|
-
|
117
|
-
Args:
|
118
|
-
input_text (str): The string to be fixed.
|
119
|
-
problem_solutions (ProblemSolutions): The problem solutions to apply.
|
120
|
-
reference (str): A reference or contextual information for the string.
|
121
|
-
**kwargs (Unpack[ValidateKwargs[str]]): Additional keyword arguments for the validation process.
|
122
|
-
|
123
|
-
Returns:
|
124
|
-
Optional[str]: The fixed string, or None if fixing fails.
|
125
|
-
"""
|
126
|
-
return await self.ageneric_string(
|
127
|
-
TEMPLATE_MANAGER.render_template(
|
128
|
-
CONFIG.templates.fix_troubled_string_template,
|
129
|
-
{
|
130
|
-
"problem": problem_solutions.problem.display(),
|
131
|
-
"solution": ok(
|
132
|
-
problem_solutions.final_solution(),
|
133
|
-
f"No solution found for problem: {problem_solutions.problem}",
|
134
|
-
).display(),
|
135
|
-
"reference": reference,
|
136
|
-
"string_to_fix": input_text,
|
137
|
-
},
|
138
|
-
),
|
139
|
-
**kwargs,
|
140
|
-
)
|
141
|
-
|
142
|
-
async def correct_obj[M: SketchedAble](
|
143
|
-
self,
|
144
|
-
obj: M,
|
145
|
-
improvement: Improvement,
|
146
|
-
reference: str = "",
|
147
|
-
**kwargs: Unpack[ValidateKwargs[M]],
|
148
|
-
) -> Optional[M]:
|
149
|
-
"""Review and correct an object based on defined criteria and templates.
|
150
|
-
|
151
|
-
This method first conducts a review of the given object, then uses the review results
|
152
|
-
to generate a corrected version of the object using appropriate templates.
|
153
|
-
|
154
|
-
Args:
|
155
|
-
obj (M): The object to be reviewed and corrected. Must implement ProposedAble.
|
156
|
-
improvement (Improvement): The improvement object containing the review results.
|
157
|
-
reference (str): A reference or contextual information for the object.
|
158
|
-
**kwargs (Unpack[ValidateKwargs[M]]): Review configuration parameters including criteria and review options.
|
159
|
-
|
160
|
-
Returns:
|
161
|
-
Optional[M]: A corrected version of the input object, or None if correction fails.
|
162
|
-
|
163
|
-
Raises:
|
164
|
-
TypeError: If the provided object doesn't implement Display or WithBriefing interfaces.
|
165
|
-
"""
|
166
|
-
if not improvement.decided():
|
167
|
-
logger.info(f"Improvement {improvement.focused_on} not decided, start deciding...")
|
168
|
-
improvement = await self.decide_improvement(improvement, **override_kwargs(kwargs, default=None))
|
169
|
-
|
170
|
-
total = len(improvement.problem_solutions)
|
171
|
-
for idx, ps in enumerate(improvement.problem_solutions):
|
172
|
-
logger.info(f"[{idx + 1}/{total}] Fixing {obj.__class__.__name__} for problem `{ps.problem.name}`")
|
173
|
-
fixed_obj = await self.fix_troubled_obj(obj, ps, reference, **kwargs)
|
174
|
-
if fixed_obj is None:
|
175
|
-
logger.error(f"[{idx + 1}/{total}] Failed to fix problem `{ps.problem.name}`")
|
176
|
-
return None
|
177
|
-
obj = fixed_obj
|
178
|
-
return obj
|
179
|
-
|
180
|
-
async def correct_string(
|
181
|
-
self, input_text: str, improvement: Improvement, reference: str = "", **kwargs: Unpack[ValidateKwargs[str]]
|
182
|
-
) -> Optional[str]:
|
183
|
-
"""Review and correct a string based on defined criteria and templates.
|
184
|
-
|
185
|
-
This method first conducts a review of the given string, then uses the review results
|
186
|
-
to generate a corrected version of the string using appropriate templates.
|
187
|
-
|
188
|
-
Args:
|
189
|
-
input_text (str): The string to be reviewed and corrected.
|
190
|
-
improvement (Improvement): The improvement object containing the review results.
|
191
|
-
reference (str): A reference or contextual information for the string.
|
192
|
-
**kwargs (Unpack[ValidateKwargs[str]]): Review configuration parameters including criteria and review options.
|
193
|
-
|
194
|
-
Returns:
|
195
|
-
Optional[str]: A corrected version of the input string, or None if correction fails.
|
196
|
-
"""
|
197
|
-
if not improvement.decided():
|
198
|
-
logger.info(f"Improvement {improvement.focused_on} not decided, start deciding...")
|
199
|
-
|
200
|
-
improvement = await self.decide_improvement(improvement, **override_kwargs(kwargs, default=None))
|
201
|
-
|
202
|
-
for ps in improvement.problem_solutions:
|
203
|
-
fixed_string = await self.fix_troubled_string(input_text, ps, reference, **kwargs)
|
204
|
-
if fixed_string is None:
|
205
|
-
logger.error(
|
206
|
-
f"Failed to fix troubling string when deal with problem: {ps.problem}",
|
207
|
-
)
|
208
|
-
return None
|
209
|
-
input_text = fixed_string
|
210
|
-
return input_text
|
211
|
-
|
212
|
-
async def correct_obj_inplace[M: ProposedUpdateAble](
|
213
|
-
self, obj: M, **kwargs: Unpack[CorrectKwargs[M]]
|
214
|
-
) -> Optional[M]:
|
215
|
-
"""Correct an object in place based on defined criteria and templates.
|
216
|
-
|
217
|
-
Args:
|
218
|
-
obj (M): The object to be corrected.
|
219
|
-
**kwargs (Unpack[CorrectKwargs[M]]): Additional keyword arguments for the correction process.
|
220
|
-
|
221
|
-
Returns:
|
222
|
-
Optional[M]: The corrected object, or None if correction fails.
|
223
|
-
"""
|
224
|
-
corrected_obj = await self.correct_obj(obj, **kwargs)
|
225
|
-
if corrected_obj is None:
|
226
|
-
return corrected_obj
|
227
|
-
obj.update_from(corrected_obj)
|
228
|
-
return obj
|
@@ -1,74 +0,0 @@
|
|
1
|
-
"""A module that provide capabilities for extracting information from a given source to a model."""
|
2
|
-
|
3
|
-
from abc import ABC
|
4
|
-
from typing import List, Optional, Type, Unpack, overload
|
5
|
-
|
6
|
-
from fabricatio import TEMPLATE_MANAGER
|
7
|
-
from fabricatio.capabilities.propose import Propose
|
8
|
-
from fabricatio.models.generic import ProposedAble
|
9
|
-
from fabricatio.models.kwargs_types import ValidateKwargs
|
10
|
-
from fabricatio.rust import CONFIG
|
11
|
-
|
12
|
-
|
13
|
-
class Extract(Propose, ABC):
|
14
|
-
"""A class that extract information from a given source to a model."""
|
15
|
-
|
16
|
-
@overload
|
17
|
-
async def extract[M: ProposedAble](
|
18
|
-
self,
|
19
|
-
cls: Type[M],
|
20
|
-
source: str,
|
21
|
-
extract_requirement: Optional[str] = None,
|
22
|
-
align_language: bool = True,
|
23
|
-
**kwargs: Unpack[ValidateKwargs[M]],
|
24
|
-
) -> M: ...
|
25
|
-
|
26
|
-
@overload
|
27
|
-
async def extract[M: ProposedAble](
|
28
|
-
self,
|
29
|
-
cls: Type[M],
|
30
|
-
source: str,
|
31
|
-
extract_requirement: Optional[str] = None,
|
32
|
-
align_language: bool = True,
|
33
|
-
**kwargs: Unpack[ValidateKwargs[None]],
|
34
|
-
) -> Optional[M]: ...
|
35
|
-
|
36
|
-
@overload
|
37
|
-
async def extract[M: ProposedAble](
|
38
|
-
self,
|
39
|
-
cls: Type[M],
|
40
|
-
source: List[str],
|
41
|
-
extract_requirement: Optional[str] = None,
|
42
|
-
align_language: bool = True,
|
43
|
-
**kwargs: Unpack[ValidateKwargs[M]],
|
44
|
-
) -> List[M]: ...
|
45
|
-
|
46
|
-
@overload
|
47
|
-
async def extract[M: ProposedAble](
|
48
|
-
self,
|
49
|
-
cls: Type[M],
|
50
|
-
source: List[str],
|
51
|
-
extract_requirement: Optional[str] = None,
|
52
|
-
align_language: bool = True,
|
53
|
-
**kwargs: Unpack[ValidateKwargs[None]],
|
54
|
-
) -> List[Optional[M]]: ...
|
55
|
-
|
56
|
-
async def extract[M: ProposedAble](
|
57
|
-
self,
|
58
|
-
cls: Type[M],
|
59
|
-
source: List[str] | str,
|
60
|
-
extract_requirement: Optional[str] = None,
|
61
|
-
align_language: bool = True,
|
62
|
-
**kwargs: Unpack[ValidateKwargs[Optional[M]]],
|
63
|
-
) -> M | List[M] | Optional[M] | List[Optional[M]]:
|
64
|
-
"""Extract information from a given source to a model."""
|
65
|
-
return await self.propose(
|
66
|
-
cls,
|
67
|
-
prompt=TEMPLATE_MANAGER.render_template(
|
68
|
-
CONFIG.templates.extract_template,
|
69
|
-
[{"source": s, "extract_requirement": extract_requirement} for s in source]
|
70
|
-
if isinstance(source, list)
|
71
|
-
else {"source": source, "extract_requirement": extract_requirement, "align_language": align_language},
|
72
|
-
),
|
73
|
-
**kwargs,
|
74
|
-
)
|
@@ -1,65 +0,0 @@
|
|
1
|
-
"""A module for the task capabilities of the Fabricatio library."""
|
2
|
-
from abc import ABC
|
3
|
-
from typing import List, Optional, Type, Unpack, overload
|
4
|
-
|
5
|
-
from fabricatio.models.generic import ProposedAble
|
6
|
-
from fabricatio.models.kwargs_types import ValidateKwargs
|
7
|
-
from fabricatio.models.usages import LLMUsage
|
8
|
-
|
9
|
-
|
10
|
-
class Propose(LLMUsage,ABC):
|
11
|
-
"""A class that proposes an Obj based on a prompt."""
|
12
|
-
|
13
|
-
@overload
|
14
|
-
async def propose[M: ProposedAble](
|
15
|
-
self,
|
16
|
-
cls: Type[M],
|
17
|
-
prompt: List[str],
|
18
|
-
**kwargs: Unpack[ValidateKwargs[None]],
|
19
|
-
) -> List[Optional[M]]: ...
|
20
|
-
|
21
|
-
@overload
|
22
|
-
async def propose[M: ProposedAble](
|
23
|
-
self,
|
24
|
-
cls: Type[M],
|
25
|
-
prompt: List[str],
|
26
|
-
**kwargs: Unpack[ValidateKwargs[M]],
|
27
|
-
) -> List[M]: ...
|
28
|
-
|
29
|
-
@overload
|
30
|
-
async def propose[M: ProposedAble](
|
31
|
-
self,
|
32
|
-
cls: Type[M],
|
33
|
-
prompt: str,
|
34
|
-
**kwargs: Unpack[ValidateKwargs[None]],
|
35
|
-
) -> Optional[M]: ...
|
36
|
-
@overload
|
37
|
-
async def propose[M: ProposedAble](
|
38
|
-
self,
|
39
|
-
cls: Type[M],
|
40
|
-
prompt: str,
|
41
|
-
**kwargs: Unpack[ValidateKwargs[M]],
|
42
|
-
) -> M: ...
|
43
|
-
|
44
|
-
async def propose[M: ProposedAble](
|
45
|
-
self,
|
46
|
-
cls: Type[M],
|
47
|
-
prompt: List[str] | str,
|
48
|
-
**kwargs: Unpack[ValidateKwargs[Optional[M]]],
|
49
|
-
) -> Optional[M] | List[Optional[M]] | M | List[M]:
|
50
|
-
"""Asynchronously proposes a task based on a given prompt and parameters.
|
51
|
-
|
52
|
-
Parameters:
|
53
|
-
cls: The class type of the task to be proposed.
|
54
|
-
prompt: The prompt text for proposing a task, which is a string that must be provided.
|
55
|
-
**kwargs: The keyword arguments for the LLM (Large Language Model) usage.
|
56
|
-
|
57
|
-
Returns:
|
58
|
-
A Task object based on the proposal result.
|
59
|
-
"""
|
60
|
-
return await self.aask_validate(
|
61
|
-
question=cls.create_json_prompt(prompt),
|
62
|
-
validator=cls.instantiate_from_string,
|
63
|
-
**kwargs,
|
64
|
-
)
|
65
|
-
|
fabricatio/capabilities/rag.py
DELETED
@@ -1,264 +0,0 @@
|
|
1
|
-
"""A module for the RAG (Retrieval Augmented Generation) model."""
|
2
|
-
|
3
|
-
from abc import ABC
|
4
|
-
|
5
|
-
try:
|
6
|
-
from pymilvus import MilvusClient
|
7
|
-
except ImportError as e:
|
8
|
-
raise RuntimeError(
|
9
|
-
"pymilvus is not installed. Have you installed `fabricatio[rag]` instead of `fabricatio`?"
|
10
|
-
) from e
|
11
|
-
from functools import lru_cache
|
12
|
-
from operator import itemgetter
|
13
|
-
from typing import List, Optional, Self, Type, Unpack
|
14
|
-
|
15
|
-
from more_itertools.recipes import flatten, unique
|
16
|
-
from pydantic import Field, PrivateAttr
|
17
|
-
|
18
|
-
from fabricatio.journal import logger
|
19
|
-
from fabricatio.models.adv_kwargs_types import CollectionConfigKwargs, FetchKwargs
|
20
|
-
from fabricatio.models.extra.rag import MilvusDataBase
|
21
|
-
from fabricatio.models.kwargs_types import ChooseKwargs
|
22
|
-
from fabricatio.models.usages import EmbeddingUsage
|
23
|
-
from fabricatio.rust import CONFIG, TEMPLATE_MANAGER
|
24
|
-
from fabricatio.utils import ok
|
25
|
-
|
26
|
-
|
27
|
-
@lru_cache(maxsize=None)
|
28
|
-
def create_client(uri: str, token: str = "", timeout: Optional[float] = None) -> MilvusClient:
|
29
|
-
"""Create a Milvus client."""
|
30
|
-
return MilvusClient(
|
31
|
-
uri=uri,
|
32
|
-
token=token,
|
33
|
-
timeout=timeout,
|
34
|
-
)
|
35
|
-
|
36
|
-
|
37
|
-
class RAG(EmbeddingUsage, ABC):
|
38
|
-
"""A class representing the RAG (Retrieval Augmented Generation) model."""
|
39
|
-
|
40
|
-
target_collection: Optional[str] = Field(default=None)
|
41
|
-
"""The name of the collection being viewed."""
|
42
|
-
|
43
|
-
_client: Optional[MilvusClient] = PrivateAttr(None)
|
44
|
-
"""The Milvus client used for the RAG model."""
|
45
|
-
|
46
|
-
@property
|
47
|
-
def client(self) -> MilvusClient:
|
48
|
-
"""Return the Milvus client."""
|
49
|
-
if self._client is None:
|
50
|
-
raise RuntimeError("Client is not initialized. Have you called `self.init_client()`?")
|
51
|
-
return self._client
|
52
|
-
|
53
|
-
def init_client(
|
54
|
-
self,
|
55
|
-
milvus_uri: Optional[str] = None,
|
56
|
-
milvus_token: Optional[str] = None,
|
57
|
-
milvus_timeout: Optional[float] = None,
|
58
|
-
) -> Self:
|
59
|
-
"""Initialize the Milvus client."""
|
60
|
-
self._client = create_client(
|
61
|
-
uri=milvus_uri or ok(self.milvus_uri or CONFIG.rag.milvus_uri),
|
62
|
-
token=milvus_token
|
63
|
-
or (token.get_secret_value() if (token := (self.milvus_token or CONFIG.rag.milvus_token)) else ""),
|
64
|
-
timeout=milvus_timeout or self.milvus_timeout or CONFIG.rag.milvus_timeout,
|
65
|
-
)
|
66
|
-
return self
|
67
|
-
|
68
|
-
def check_client(self, init: bool = True) -> Self:
|
69
|
-
"""Check if the client is initialized, and if not, initialize it."""
|
70
|
-
if self._client is None and init:
|
71
|
-
return self.init_client()
|
72
|
-
if self._client is None and not init:
|
73
|
-
raise RuntimeError("Client is not initialized. Have you called `self.init_client()`?")
|
74
|
-
return self
|
75
|
-
|
76
|
-
def view(
|
77
|
-
self, collection_name: Optional[str], create: bool = False, **kwargs: Unpack[CollectionConfigKwargs]
|
78
|
-
) -> Self:
|
79
|
-
"""View the specified collection.
|
80
|
-
|
81
|
-
Args:
|
82
|
-
collection_name (str): The name of the collection.
|
83
|
-
create (bool): Whether to create the collection if it does not exist.
|
84
|
-
**kwargs (Unpack[CollectionConfigKwargs]): Additional keyword arguments for collection configuration.
|
85
|
-
"""
|
86
|
-
if create and collection_name and not self.check_client().client.has_collection(collection_name):
|
87
|
-
kwargs["dimension"] = ok(
|
88
|
-
kwargs.get("dimension")
|
89
|
-
or self.milvus_dimensions
|
90
|
-
or CONFIG.rag.milvus_dimensions
|
91
|
-
or self.embedding_dimensions
|
92
|
-
or CONFIG.embedding.dimensions,
|
93
|
-
"`dimension` is not set at any level.",
|
94
|
-
)
|
95
|
-
self.client.create_collection(collection_name, auto_id=True, **kwargs)
|
96
|
-
logger.info(f"Creating collection {collection_name}")
|
97
|
-
|
98
|
-
self.target_collection = collection_name
|
99
|
-
return self
|
100
|
-
|
101
|
-
def quit_viewing(self) -> Self:
|
102
|
-
"""Quit the current view.
|
103
|
-
|
104
|
-
Returns:
|
105
|
-
Self: The current instance, allowing for method chaining.
|
106
|
-
"""
|
107
|
-
return self.view(None)
|
108
|
-
|
109
|
-
@property
|
110
|
-
def safe_target_collection(self) -> str:
|
111
|
-
"""Get the name of the collection being viewed, raise an error if not viewing any collection.
|
112
|
-
|
113
|
-
Returns:
|
114
|
-
str: The name of the collection being viewed.
|
115
|
-
"""
|
116
|
-
return ok(self.target_collection, "No collection is being viewed. Have you called `self.view()`?")
|
117
|
-
|
118
|
-
async def add_document[D: MilvusDataBase](
|
119
|
-
self, data: List[D] | D, collection_name: Optional[str] = None, flush: bool = False
|
120
|
-
) -> Self:
|
121
|
-
"""Adds a document to the specified collection.
|
122
|
-
|
123
|
-
Args:
|
124
|
-
data (Union[Dict[str, Any], MilvusDataBase] | List[Union[Dict[str, Any], MilvusDataBase]]): The data to be added to the collection.
|
125
|
-
collection_name (Optional[str]): The name of the collection. If not provided, the currently viewed collection is used.
|
126
|
-
flush (bool): Whether to flush the collection after insertion.
|
127
|
-
|
128
|
-
Returns:
|
129
|
-
Self: The current instance, allowing for method chaining.
|
130
|
-
"""
|
131
|
-
if isinstance(data, MilvusDataBase):
|
132
|
-
data = [data]
|
133
|
-
|
134
|
-
data_vec = await self.vectorize([d.prepare_vectorization() for d in data])
|
135
|
-
prepared_data = [d.prepare_insertion(vec) for d, vec in zip(data, data_vec, strict=True)]
|
136
|
-
|
137
|
-
c_name = collection_name or self.safe_target_collection
|
138
|
-
self.check_client().client.insert(c_name, prepared_data)
|
139
|
-
|
140
|
-
if flush:
|
141
|
-
logger.debug(f"Flushing collection {c_name}")
|
142
|
-
self.client.flush(c_name)
|
143
|
-
return self
|
144
|
-
|
145
|
-
async def afetch_document[D: MilvusDataBase](
|
146
|
-
self,
|
147
|
-
query: List[str],
|
148
|
-
document_model: Type[D],
|
149
|
-
collection_name: Optional[str] = None,
|
150
|
-
similarity_threshold: float = 0.37,
|
151
|
-
result_per_query: int = 10,
|
152
|
-
tei_endpoint: Optional[str] = None,
|
153
|
-
reranker_threshold: float = 0.7,
|
154
|
-
filter_expr: str = "",
|
155
|
-
) -> List[D]:
|
156
|
-
"""Asynchronously fetches documents from a Milvus database based on input vectors.
|
157
|
-
|
158
|
-
Args:
|
159
|
-
query (List[str]): A list of vectors to search for in the database.
|
160
|
-
document_model (Type[D]): The model class used to convert fetched data into document objects.
|
161
|
-
collection_name (Optional[str]): The name of the collection to search within.
|
162
|
-
If None, the currently viewed collection is used.
|
163
|
-
similarity_threshold (float): The similarity threshold for vector search. Defaults to 0.37.
|
164
|
-
result_per_query (int): The maximum number of results to return per query. Defaults to 10.
|
165
|
-
tei_endpoint (str): the endpoint of the TEI api.
|
166
|
-
reranker_threshold (float): The threshold used to filtered low relativity document.
|
167
|
-
filter_expr (str) : The filter expression used to filter out unwanted documents.
|
168
|
-
|
169
|
-
Returns:
|
170
|
-
List[D]: A list of document objects created from the fetched data.
|
171
|
-
"""
|
172
|
-
# Step 1: Search for vectors
|
173
|
-
search_results = self.check_client().client.search(
|
174
|
-
collection_name or self.safe_target_collection,
|
175
|
-
await self.vectorize(query),
|
176
|
-
search_params={"radius": similarity_threshold},
|
177
|
-
output_fields=list(document_model.model_fields),
|
178
|
-
filter=filter_expr,
|
179
|
-
limit=result_per_query,
|
180
|
-
)
|
181
|
-
if tei_endpoint is not None:
|
182
|
-
from fabricatio.rust import TEIClient
|
183
|
-
|
184
|
-
reranker = TEIClient(base_url=tei_endpoint)
|
185
|
-
|
186
|
-
retrieved_id = set()
|
187
|
-
raw_result = []
|
188
|
-
|
189
|
-
for q, g in zip(query, search_results, strict=True):
|
190
|
-
models = document_model.from_sequence([res["entity"] for res in g if res["id"] not in retrieved_id])
|
191
|
-
logger.debug(f"Retrived {len(g)} raw document, filtered out {len(models)}.")
|
192
|
-
retrieved_id.update(res["id"] for res in g)
|
193
|
-
if not models:
|
194
|
-
continue
|
195
|
-
rank_scores = await reranker.arerank(q, [m.prepare_vectorization() for m in models], truncate=True,
|
196
|
-
truncation_direction="Left")
|
197
|
-
raw_result.extend((models[idx], scr) for (idx, scr) in rank_scores if scr > reranker_threshold)
|
198
|
-
|
199
|
-
raw_result_sorted = sorted(raw_result, key=lambda x: x[1], reverse=True)
|
200
|
-
return [r[0] for r in raw_result_sorted]
|
201
|
-
|
202
|
-
# Step 2: Flatten the search results
|
203
|
-
flattened_results = flatten(search_results)
|
204
|
-
unique_results = unique(flattened_results, key=itemgetter("id"))
|
205
|
-
|
206
|
-
# Step 3: Sort by distance (descending)
|
207
|
-
sorted_results = sorted(unique_results, key=itemgetter("distance"), reverse=True)
|
208
|
-
|
209
|
-
logger.debug(
|
210
|
-
f"Fetched {len(sorted_results)} document,searched similarities: {[t['distance'] for t in sorted_results]}"
|
211
|
-
)
|
212
|
-
# Step 4: Extract the entities
|
213
|
-
resp = [result["entity"] for result in sorted_results]
|
214
|
-
|
215
|
-
return document_model.from_sequence(resp)
|
216
|
-
|
217
|
-
async def aretrieve[D: MilvusDataBase](
|
218
|
-
self,
|
219
|
-
query: List[str] | str,
|
220
|
-
document_model: Type[D],
|
221
|
-
max_accepted: int = 20,
|
222
|
-
**kwargs: Unpack[FetchKwargs],
|
223
|
-
) -> List[D]:
|
224
|
-
"""Retrieve data from the collection.
|
225
|
-
|
226
|
-
Args:
|
227
|
-
query (List[str] | str): The query to be used for retrieval.
|
228
|
-
document_model (Type[D]): The model class used to convert retrieved data into document objects.
|
229
|
-
max_accepted (int): The final limit on the number of results to return.
|
230
|
-
**kwargs (Unpack[FetchKwargs]): Additional keyword arguments for retrieval.
|
231
|
-
|
232
|
-
Returns:
|
233
|
-
List[D]: A list of document objects created from the retrieved data.
|
234
|
-
"""
|
235
|
-
if isinstance(query, str):
|
236
|
-
query = [query]
|
237
|
-
|
238
|
-
return (
|
239
|
-
await self.afetch_document(
|
240
|
-
query=query,
|
241
|
-
document_model=document_model,
|
242
|
-
**kwargs,
|
243
|
-
)
|
244
|
-
)[:max_accepted]
|
245
|
-
|
246
|
-
async def arefined_query(
|
247
|
-
self, question: List[str] | str, **kwargs: Unpack[ChooseKwargs[Optional[List[str]]]]
|
248
|
-
) -> Optional[List[str]]:
|
249
|
-
"""Refines the given question using a template.
|
250
|
-
|
251
|
-
Args:
|
252
|
-
question (List[str] | str): The question to be refined.
|
253
|
-
**kwargs (Unpack[ChooseKwargs]): Additional keyword arguments for the refinement process.
|
254
|
-
|
255
|
-
Returns:
|
256
|
-
List[str]: A list of refined questions.
|
257
|
-
"""
|
258
|
-
return await self.alist_str(
|
259
|
-
TEMPLATE_MANAGER.render_template(
|
260
|
-
CONFIG.templates.refined_query_template,
|
261
|
-
{"question": [question] if isinstance(question, str) else question},
|
262
|
-
),
|
263
|
-
**kwargs,
|
264
|
-
)
|