fabricatio 0.3.15.dev5__cp313-cp313-win_amd64.whl → 0.4.0.dev0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. fabricatio/__init__.py +8 -9
  2. fabricatio/rust.cp313-win_amd64.pyd +0 -0
  3. fabricatio/toolboxes/arithmetic.py +1 -1
  4. fabricatio/toolboxes/fs.py +2 -2
  5. fabricatio/workflows/rag.py +2 -1
  6. fabricatio-0.4.0.dev0.data/scripts/tdown.exe +0 -0
  7. {fabricatio-0.3.15.dev5.dist-info → fabricatio-0.4.0.dev0.dist-info}/METADATA +22 -16
  8. fabricatio-0.4.0.dev0.dist-info/RECORD +13 -0
  9. fabricatio/actions/__init__.py +0 -1
  10. fabricatio/actions/article.py +0 -415
  11. fabricatio/actions/article_rag.py +0 -407
  12. fabricatio/actions/fs.py +0 -25
  13. fabricatio/actions/output.py +0 -247
  14. fabricatio/actions/rag.py +0 -96
  15. fabricatio/actions/rules.py +0 -83
  16. fabricatio/capabilities/__init__.py +0 -1
  17. fabricatio/capabilities/advanced_judge.py +0 -20
  18. fabricatio/capabilities/advanced_rag.py +0 -61
  19. fabricatio/capabilities/censor.py +0 -105
  20. fabricatio/capabilities/check.py +0 -212
  21. fabricatio/capabilities/correct.py +0 -228
  22. fabricatio/capabilities/extract.py +0 -74
  23. fabricatio/capabilities/propose.py +0 -65
  24. fabricatio/capabilities/rag.py +0 -264
  25. fabricatio/capabilities/rating.py +0 -404
  26. fabricatio/capabilities/review.py +0 -114
  27. fabricatio/capabilities/task.py +0 -113
  28. fabricatio/decorators.py +0 -253
  29. fabricatio/emitter.py +0 -177
  30. fabricatio/fs/__init__.py +0 -35
  31. fabricatio/fs/curd.py +0 -153
  32. fabricatio/fs/readers.py +0 -61
  33. fabricatio/journal.py +0 -12
  34. fabricatio/models/action.py +0 -263
  35. fabricatio/models/adv_kwargs_types.py +0 -63
  36. fabricatio/models/extra/__init__.py +0 -1
  37. fabricatio/models/extra/advanced_judge.py +0 -32
  38. fabricatio/models/extra/aricle_rag.py +0 -286
  39. fabricatio/models/extra/article_base.py +0 -488
  40. fabricatio/models/extra/article_essence.py +0 -98
  41. fabricatio/models/extra/article_main.py +0 -285
  42. fabricatio/models/extra/article_outline.py +0 -45
  43. fabricatio/models/extra/article_proposal.py +0 -52
  44. fabricatio/models/extra/patches.py +0 -20
  45. fabricatio/models/extra/problem.py +0 -165
  46. fabricatio/models/extra/rag.py +0 -98
  47. fabricatio/models/extra/rule.py +0 -51
  48. fabricatio/models/generic.py +0 -904
  49. fabricatio/models/kwargs_types.py +0 -121
  50. fabricatio/models/role.py +0 -156
  51. fabricatio/models/task.py +0 -310
  52. fabricatio/models/tool.py +0 -328
  53. fabricatio/models/usages.py +0 -791
  54. fabricatio/parser.py +0 -114
  55. fabricatio/rust.pyi +0 -846
  56. fabricatio/utils.py +0 -156
  57. fabricatio/workflows/articles.py +0 -24
  58. fabricatio-0.3.15.dev5.data/scripts/tdown.exe +0 -0
  59. fabricatio-0.3.15.dev5.data/scripts/ttm.exe +0 -0
  60. fabricatio-0.3.15.dev5.dist-info/RECORD +0 -63
  61. {fabricatio-0.3.15.dev5.dist-info → fabricatio-0.4.0.dev0.dist-info}/WHEEL +0 -0
  62. {fabricatio-0.3.15.dev5.dist-info → fabricatio-0.4.0.dev0.dist-info}/licenses/LICENSE +0 -0
@@ -1,228 +0,0 @@
1
- """A module containing the Correct capability for reviewing, validating, and improving objects."""
2
-
3
- from abc import ABC
4
- from asyncio import gather
5
- from typing import Optional, Type, Unpack, cast
6
-
7
- from fabricatio.capabilities.propose import Propose
8
- from fabricatio.capabilities.rating import Rating
9
- from fabricatio.journal import logger
10
- from fabricatio.models.adv_kwargs_types import CorrectKwargs
11
- from fabricatio.models.extra.problem import Improvement, ProblemSolutions
12
- from fabricatio.models.generic import ProposedUpdateAble, SketchedAble
13
- from fabricatio.models.kwargs_types import (
14
- BestKwargs,
15
- ValidateKwargs,
16
- )
17
- from fabricatio.rust import CONFIG, TEMPLATE_MANAGER
18
- from fabricatio.utils import fallback_kwargs, ok, override_kwargs
19
-
20
-
21
- class Correct(Rating, Propose, ABC):
22
- """A class that provides the capability to correct objects."""
23
-
24
- async def decide_solution(
25
- self, problem_solutions: ProblemSolutions, **kwargs: Unpack[BestKwargs]
26
- ) -> ProblemSolutions:
27
- """Decide the best solution from a list of problem solutions.
28
-
29
- Args:
30
- problem_solutions (ProblemSolutions): The problem solutions to evaluate.
31
- **kwargs (Unpack[BestKwargs]): Additional keyword arguments for the decision process.
32
-
33
- Returns:
34
- ProblemSolutions: The problem solutions with the best solution selected.
35
- """
36
- if (leng := len(problem_solutions.solutions)) == 0:
37
- logger.error(f"No solutions found in ProblemSolutions, Skip: `{problem_solutions.problem.name}`")
38
- if leng > 1:
39
- logger.info(f"{leng} solutions found in Problem `{problem_solutions.problem.name}`, select the best.")
40
- problem_solutions.solutions = await self.best(problem_solutions.solutions, **kwargs)
41
- return problem_solutions
42
-
43
- async def decide_improvement(self, improvement: Improvement, **kwargs: Unpack[BestKwargs]) -> Improvement:
44
- """Decide the best solution for each problem solution in an improvement.
45
-
46
- Args:
47
- improvement (Improvement): The improvement containing problem solutions to evaluate.
48
- **kwargs (Unpack[BestKwargs]): Additional keyword arguments for the decision process.
49
-
50
- Returns:
51
- Improvement: The improvement with the best solutions selected for each problem solution.
52
- """
53
- if leng := len(improvement.problem_solutions):
54
- logger.debug(f"{leng} problem_solutions found in Improvement, decide solution for each of them.")
55
- await gather(
56
- *[
57
- self.decide_solution(
58
- ps,
59
- **fallback_kwargs(
60
- kwargs, topic=f"which solution is better to deal this problem {ps.problem.description}\n\n"
61
- ),
62
- )
63
- for ps in improvement.problem_solutions
64
- ],
65
- )
66
- if any(not (violated := ps).decided() for ps in improvement.problem_solutions):
67
- logger.error(f"Some problem_solutions are not decided: {violated}")
68
- else:
69
- logger.success(f"All problem_solutions are decided '{improvement.focused_on}'")
70
- else:
71
- logger.error(f"No problem_solutions found in Improvement, Skip: {improvement}")
72
- return improvement
73
-
74
- async def fix_troubled_obj[M: SketchedAble](
75
- self,
76
- obj: M,
77
- problem_solutions: ProblemSolutions,
78
- reference: str = "",
79
- **kwargs: Unpack[ValidateKwargs[M]],
80
- ) -> Optional[M]:
81
- """Fix a troubled object based on problem solutions.
82
-
83
- Args:
84
- obj (M): The object to be fixed.
85
- problem_solutions (ProblemSolutions): The problem solutions to apply.
86
- reference (str): A reference or contextual information for the object.
87
- **kwargs (Unpack[ValidateKwargs[M]]): Additional keyword arguments for the validation process.
88
-
89
- Returns:
90
- Optional[M]: The fixed object, or None if fixing fails.
91
- """
92
- return await self.propose(
93
- cast("Type[M]", obj.__class__),
94
- TEMPLATE_MANAGER.render_template(
95
- CONFIG.templates.fix_troubled_obj_template,
96
- {
97
- "problem": problem_solutions.problem.display(),
98
- "solution": ok(
99
- problem_solutions.final_solution(),
100
- f"{len(problem_solutions.solutions)} solution Found for `{problem_solutions.problem.name}`.",
101
- ).display(),
102
- "reference": reference,
103
- },
104
- ),
105
- **kwargs,
106
- )
107
-
108
- async def fix_troubled_string(
109
- self,
110
- input_text: str,
111
- problem_solutions: ProblemSolutions,
112
- reference: str = "",
113
- **kwargs: Unpack[ValidateKwargs[str]],
114
- ) -> Optional[str]:
115
- """Fix a troubled string based on problem solutions.
116
-
117
- Args:
118
- input_text (str): The string to be fixed.
119
- problem_solutions (ProblemSolutions): The problem solutions to apply.
120
- reference (str): A reference or contextual information for the string.
121
- **kwargs (Unpack[ValidateKwargs[str]]): Additional keyword arguments for the validation process.
122
-
123
- Returns:
124
- Optional[str]: The fixed string, or None if fixing fails.
125
- """
126
- return await self.ageneric_string(
127
- TEMPLATE_MANAGER.render_template(
128
- CONFIG.templates.fix_troubled_string_template,
129
- {
130
- "problem": problem_solutions.problem.display(),
131
- "solution": ok(
132
- problem_solutions.final_solution(),
133
- f"No solution found for problem: {problem_solutions.problem}",
134
- ).display(),
135
- "reference": reference,
136
- "string_to_fix": input_text,
137
- },
138
- ),
139
- **kwargs,
140
- )
141
-
142
- async def correct_obj[M: SketchedAble](
143
- self,
144
- obj: M,
145
- improvement: Improvement,
146
- reference: str = "",
147
- **kwargs: Unpack[ValidateKwargs[M]],
148
- ) -> Optional[M]:
149
- """Review and correct an object based on defined criteria and templates.
150
-
151
- This method first conducts a review of the given object, then uses the review results
152
- to generate a corrected version of the object using appropriate templates.
153
-
154
- Args:
155
- obj (M): The object to be reviewed and corrected. Must implement ProposedAble.
156
- improvement (Improvement): The improvement object containing the review results.
157
- reference (str): A reference or contextual information for the object.
158
- **kwargs (Unpack[ValidateKwargs[M]]): Review configuration parameters including criteria and review options.
159
-
160
- Returns:
161
- Optional[M]: A corrected version of the input object, or None if correction fails.
162
-
163
- Raises:
164
- TypeError: If the provided object doesn't implement Display or WithBriefing interfaces.
165
- """
166
- if not improvement.decided():
167
- logger.info(f"Improvement {improvement.focused_on} not decided, start deciding...")
168
- improvement = await self.decide_improvement(improvement, **override_kwargs(kwargs, default=None))
169
-
170
- total = len(improvement.problem_solutions)
171
- for idx, ps in enumerate(improvement.problem_solutions):
172
- logger.info(f"[{idx + 1}/{total}] Fixing {obj.__class__.__name__} for problem `{ps.problem.name}`")
173
- fixed_obj = await self.fix_troubled_obj(obj, ps, reference, **kwargs)
174
- if fixed_obj is None:
175
- logger.error(f"[{idx + 1}/{total}] Failed to fix problem `{ps.problem.name}`")
176
- return None
177
- obj = fixed_obj
178
- return obj
179
-
180
- async def correct_string(
181
- self, input_text: str, improvement: Improvement, reference: str = "", **kwargs: Unpack[ValidateKwargs[str]]
182
- ) -> Optional[str]:
183
- """Review and correct a string based on defined criteria and templates.
184
-
185
- This method first conducts a review of the given string, then uses the review results
186
- to generate a corrected version of the string using appropriate templates.
187
-
188
- Args:
189
- input_text (str): The string to be reviewed and corrected.
190
- improvement (Improvement): The improvement object containing the review results.
191
- reference (str): A reference or contextual information for the string.
192
- **kwargs (Unpack[ValidateKwargs[str]]): Review configuration parameters including criteria and review options.
193
-
194
- Returns:
195
- Optional[str]: A corrected version of the input string, or None if correction fails.
196
- """
197
- if not improvement.decided():
198
- logger.info(f"Improvement {improvement.focused_on} not decided, start deciding...")
199
-
200
- improvement = await self.decide_improvement(improvement, **override_kwargs(kwargs, default=None))
201
-
202
- for ps in improvement.problem_solutions:
203
- fixed_string = await self.fix_troubled_string(input_text, ps, reference, **kwargs)
204
- if fixed_string is None:
205
- logger.error(
206
- f"Failed to fix troubling string when deal with problem: {ps.problem}",
207
- )
208
- return None
209
- input_text = fixed_string
210
- return input_text
211
-
212
- async def correct_obj_inplace[M: ProposedUpdateAble](
213
- self, obj: M, **kwargs: Unpack[CorrectKwargs[M]]
214
- ) -> Optional[M]:
215
- """Correct an object in place based on defined criteria and templates.
216
-
217
- Args:
218
- obj (M): The object to be corrected.
219
- **kwargs (Unpack[CorrectKwargs[M]]): Additional keyword arguments for the correction process.
220
-
221
- Returns:
222
- Optional[M]: The corrected object, or None if correction fails.
223
- """
224
- corrected_obj = await self.correct_obj(obj, **kwargs)
225
- if corrected_obj is None:
226
- return corrected_obj
227
- obj.update_from(corrected_obj)
228
- return obj
@@ -1,74 +0,0 @@
1
- """A module that provide capabilities for extracting information from a given source to a model."""
2
-
3
- from abc import ABC
4
- from typing import List, Optional, Type, Unpack, overload
5
-
6
- from fabricatio import TEMPLATE_MANAGER
7
- from fabricatio.capabilities.propose import Propose
8
- from fabricatio.models.generic import ProposedAble
9
- from fabricatio.models.kwargs_types import ValidateKwargs
10
- from fabricatio.rust import CONFIG
11
-
12
-
13
- class Extract(Propose, ABC):
14
- """A class that extract information from a given source to a model."""
15
-
16
- @overload
17
- async def extract[M: ProposedAble](
18
- self,
19
- cls: Type[M],
20
- source: str,
21
- extract_requirement: Optional[str] = None,
22
- align_language: bool = True,
23
- **kwargs: Unpack[ValidateKwargs[M]],
24
- ) -> M: ...
25
-
26
- @overload
27
- async def extract[M: ProposedAble](
28
- self,
29
- cls: Type[M],
30
- source: str,
31
- extract_requirement: Optional[str] = None,
32
- align_language: bool = True,
33
- **kwargs: Unpack[ValidateKwargs[None]],
34
- ) -> Optional[M]: ...
35
-
36
- @overload
37
- async def extract[M: ProposedAble](
38
- self,
39
- cls: Type[M],
40
- source: List[str],
41
- extract_requirement: Optional[str] = None,
42
- align_language: bool = True,
43
- **kwargs: Unpack[ValidateKwargs[M]],
44
- ) -> List[M]: ...
45
-
46
- @overload
47
- async def extract[M: ProposedAble](
48
- self,
49
- cls: Type[M],
50
- source: List[str],
51
- extract_requirement: Optional[str] = None,
52
- align_language: bool = True,
53
- **kwargs: Unpack[ValidateKwargs[None]],
54
- ) -> List[Optional[M]]: ...
55
-
56
- async def extract[M: ProposedAble](
57
- self,
58
- cls: Type[M],
59
- source: List[str] | str,
60
- extract_requirement: Optional[str] = None,
61
- align_language: bool = True,
62
- **kwargs: Unpack[ValidateKwargs[Optional[M]]],
63
- ) -> M | List[M] | Optional[M] | List[Optional[M]]:
64
- """Extract information from a given source to a model."""
65
- return await self.propose(
66
- cls,
67
- prompt=TEMPLATE_MANAGER.render_template(
68
- CONFIG.templates.extract_template,
69
- [{"source": s, "extract_requirement": extract_requirement} for s in source]
70
- if isinstance(source, list)
71
- else {"source": source, "extract_requirement": extract_requirement, "align_language": align_language},
72
- ),
73
- **kwargs,
74
- )
@@ -1,65 +0,0 @@
1
- """A module for the task capabilities of the Fabricatio library."""
2
- from abc import ABC
3
- from typing import List, Optional, Type, Unpack, overload
4
-
5
- from fabricatio.models.generic import ProposedAble
6
- from fabricatio.models.kwargs_types import ValidateKwargs
7
- from fabricatio.models.usages import LLMUsage
8
-
9
-
10
- class Propose(LLMUsage,ABC):
11
- """A class that proposes an Obj based on a prompt."""
12
-
13
- @overload
14
- async def propose[M: ProposedAble](
15
- self,
16
- cls: Type[M],
17
- prompt: List[str],
18
- **kwargs: Unpack[ValidateKwargs[None]],
19
- ) -> List[Optional[M]]: ...
20
-
21
- @overload
22
- async def propose[M: ProposedAble](
23
- self,
24
- cls: Type[M],
25
- prompt: List[str],
26
- **kwargs: Unpack[ValidateKwargs[M]],
27
- ) -> List[M]: ...
28
-
29
- @overload
30
- async def propose[M: ProposedAble](
31
- self,
32
- cls: Type[M],
33
- prompt: str,
34
- **kwargs: Unpack[ValidateKwargs[None]],
35
- ) -> Optional[M]: ...
36
- @overload
37
- async def propose[M: ProposedAble](
38
- self,
39
- cls: Type[M],
40
- prompt: str,
41
- **kwargs: Unpack[ValidateKwargs[M]],
42
- ) -> M: ...
43
-
44
- async def propose[M: ProposedAble](
45
- self,
46
- cls: Type[M],
47
- prompt: List[str] | str,
48
- **kwargs: Unpack[ValidateKwargs[Optional[M]]],
49
- ) -> Optional[M] | List[Optional[M]] | M | List[M]:
50
- """Asynchronously proposes a task based on a given prompt and parameters.
51
-
52
- Parameters:
53
- cls: The class type of the task to be proposed.
54
- prompt: The prompt text for proposing a task, which is a string that must be provided.
55
- **kwargs: The keyword arguments for the LLM (Large Language Model) usage.
56
-
57
- Returns:
58
- A Task object based on the proposal result.
59
- """
60
- return await self.aask_validate(
61
- question=cls.create_json_prompt(prompt),
62
- validator=cls.instantiate_from_string,
63
- **kwargs,
64
- )
65
-
@@ -1,264 +0,0 @@
1
- """A module for the RAG (Retrieval Augmented Generation) model."""
2
-
3
- from abc import ABC
4
-
5
- try:
6
- from pymilvus import MilvusClient
7
- except ImportError as e:
8
- raise RuntimeError(
9
- "pymilvus is not installed. Have you installed `fabricatio[rag]` instead of `fabricatio`?"
10
- ) from e
11
- from functools import lru_cache
12
- from operator import itemgetter
13
- from typing import List, Optional, Self, Type, Unpack
14
-
15
- from more_itertools.recipes import flatten, unique
16
- from pydantic import Field, PrivateAttr
17
-
18
- from fabricatio.journal import logger
19
- from fabricatio.models.adv_kwargs_types import CollectionConfigKwargs, FetchKwargs
20
- from fabricatio.models.extra.rag import MilvusDataBase
21
- from fabricatio.models.kwargs_types import ChooseKwargs
22
- from fabricatio.models.usages import EmbeddingUsage
23
- from fabricatio.rust import CONFIG, TEMPLATE_MANAGER
24
- from fabricatio.utils import ok
25
-
26
-
27
- @lru_cache(maxsize=None)
28
- def create_client(uri: str, token: str = "", timeout: Optional[float] = None) -> MilvusClient:
29
- """Create a Milvus client."""
30
- return MilvusClient(
31
- uri=uri,
32
- token=token,
33
- timeout=timeout,
34
- )
35
-
36
-
37
- class RAG(EmbeddingUsage, ABC):
38
- """A class representing the RAG (Retrieval Augmented Generation) model."""
39
-
40
- target_collection: Optional[str] = Field(default=None)
41
- """The name of the collection being viewed."""
42
-
43
- _client: Optional[MilvusClient] = PrivateAttr(None)
44
- """The Milvus client used for the RAG model."""
45
-
46
- @property
47
- def client(self) -> MilvusClient:
48
- """Return the Milvus client."""
49
- if self._client is None:
50
- raise RuntimeError("Client is not initialized. Have you called `self.init_client()`?")
51
- return self._client
52
-
53
- def init_client(
54
- self,
55
- milvus_uri: Optional[str] = None,
56
- milvus_token: Optional[str] = None,
57
- milvus_timeout: Optional[float] = None,
58
- ) -> Self:
59
- """Initialize the Milvus client."""
60
- self._client = create_client(
61
- uri=milvus_uri or ok(self.milvus_uri or CONFIG.rag.milvus_uri),
62
- token=milvus_token
63
- or (token.get_secret_value() if (token := (self.milvus_token or CONFIG.rag.milvus_token)) else ""),
64
- timeout=milvus_timeout or self.milvus_timeout or CONFIG.rag.milvus_timeout,
65
- )
66
- return self
67
-
68
- def check_client(self, init: bool = True) -> Self:
69
- """Check if the client is initialized, and if not, initialize it."""
70
- if self._client is None and init:
71
- return self.init_client()
72
- if self._client is None and not init:
73
- raise RuntimeError("Client is not initialized. Have you called `self.init_client()`?")
74
- return self
75
-
76
- def view(
77
- self, collection_name: Optional[str], create: bool = False, **kwargs: Unpack[CollectionConfigKwargs]
78
- ) -> Self:
79
- """View the specified collection.
80
-
81
- Args:
82
- collection_name (str): The name of the collection.
83
- create (bool): Whether to create the collection if it does not exist.
84
- **kwargs (Unpack[CollectionConfigKwargs]): Additional keyword arguments for collection configuration.
85
- """
86
- if create and collection_name and not self.check_client().client.has_collection(collection_name):
87
- kwargs["dimension"] = ok(
88
- kwargs.get("dimension")
89
- or self.milvus_dimensions
90
- or CONFIG.rag.milvus_dimensions
91
- or self.embedding_dimensions
92
- or CONFIG.embedding.dimensions,
93
- "`dimension` is not set at any level.",
94
- )
95
- self.client.create_collection(collection_name, auto_id=True, **kwargs)
96
- logger.info(f"Creating collection {collection_name}")
97
-
98
- self.target_collection = collection_name
99
- return self
100
-
101
- def quit_viewing(self) -> Self:
102
- """Quit the current view.
103
-
104
- Returns:
105
- Self: The current instance, allowing for method chaining.
106
- """
107
- return self.view(None)
108
-
109
- @property
110
- def safe_target_collection(self) -> str:
111
- """Get the name of the collection being viewed, raise an error if not viewing any collection.
112
-
113
- Returns:
114
- str: The name of the collection being viewed.
115
- """
116
- return ok(self.target_collection, "No collection is being viewed. Have you called `self.view()`?")
117
-
118
- async def add_document[D: MilvusDataBase](
119
- self, data: List[D] | D, collection_name: Optional[str] = None, flush: bool = False
120
- ) -> Self:
121
- """Adds a document to the specified collection.
122
-
123
- Args:
124
- data (Union[Dict[str, Any], MilvusDataBase] | List[Union[Dict[str, Any], MilvusDataBase]]): The data to be added to the collection.
125
- collection_name (Optional[str]): The name of the collection. If not provided, the currently viewed collection is used.
126
- flush (bool): Whether to flush the collection after insertion.
127
-
128
- Returns:
129
- Self: The current instance, allowing for method chaining.
130
- """
131
- if isinstance(data, MilvusDataBase):
132
- data = [data]
133
-
134
- data_vec = await self.vectorize([d.prepare_vectorization() for d in data])
135
- prepared_data = [d.prepare_insertion(vec) for d, vec in zip(data, data_vec, strict=True)]
136
-
137
- c_name = collection_name or self.safe_target_collection
138
- self.check_client().client.insert(c_name, prepared_data)
139
-
140
- if flush:
141
- logger.debug(f"Flushing collection {c_name}")
142
- self.client.flush(c_name)
143
- return self
144
-
145
- async def afetch_document[D: MilvusDataBase](
146
- self,
147
- query: List[str],
148
- document_model: Type[D],
149
- collection_name: Optional[str] = None,
150
- similarity_threshold: float = 0.37,
151
- result_per_query: int = 10,
152
- tei_endpoint: Optional[str] = None,
153
- reranker_threshold: float = 0.7,
154
- filter_expr: str = "",
155
- ) -> List[D]:
156
- """Asynchronously fetches documents from a Milvus database based on input vectors.
157
-
158
- Args:
159
- query (List[str]): A list of vectors to search for in the database.
160
- document_model (Type[D]): The model class used to convert fetched data into document objects.
161
- collection_name (Optional[str]): The name of the collection to search within.
162
- If None, the currently viewed collection is used.
163
- similarity_threshold (float): The similarity threshold for vector search. Defaults to 0.37.
164
- result_per_query (int): The maximum number of results to return per query. Defaults to 10.
165
- tei_endpoint (str): the endpoint of the TEI api.
166
- reranker_threshold (float): The threshold used to filtered low relativity document.
167
- filter_expr (str) : The filter expression used to filter out unwanted documents.
168
-
169
- Returns:
170
- List[D]: A list of document objects created from the fetched data.
171
- """
172
- # Step 1: Search for vectors
173
- search_results = self.check_client().client.search(
174
- collection_name or self.safe_target_collection,
175
- await self.vectorize(query),
176
- search_params={"radius": similarity_threshold},
177
- output_fields=list(document_model.model_fields),
178
- filter=filter_expr,
179
- limit=result_per_query,
180
- )
181
- if tei_endpoint is not None:
182
- from fabricatio.rust import TEIClient
183
-
184
- reranker = TEIClient(base_url=tei_endpoint)
185
-
186
- retrieved_id = set()
187
- raw_result = []
188
-
189
- for q, g in zip(query, search_results, strict=True):
190
- models = document_model.from_sequence([res["entity"] for res in g if res["id"] not in retrieved_id])
191
- logger.debug(f"Retrived {len(g)} raw document, filtered out {len(models)}.")
192
- retrieved_id.update(res["id"] for res in g)
193
- if not models:
194
- continue
195
- rank_scores = await reranker.arerank(q, [m.prepare_vectorization() for m in models], truncate=True,
196
- truncation_direction="Left")
197
- raw_result.extend((models[idx], scr) for (idx, scr) in rank_scores if scr > reranker_threshold)
198
-
199
- raw_result_sorted = sorted(raw_result, key=lambda x: x[1], reverse=True)
200
- return [r[0] for r in raw_result_sorted]
201
-
202
- # Step 2: Flatten the search results
203
- flattened_results = flatten(search_results)
204
- unique_results = unique(flattened_results, key=itemgetter("id"))
205
-
206
- # Step 3: Sort by distance (descending)
207
- sorted_results = sorted(unique_results, key=itemgetter("distance"), reverse=True)
208
-
209
- logger.debug(
210
- f"Fetched {len(sorted_results)} document,searched similarities: {[t['distance'] for t in sorted_results]}"
211
- )
212
- # Step 4: Extract the entities
213
- resp = [result["entity"] for result in sorted_results]
214
-
215
- return document_model.from_sequence(resp)
216
-
217
- async def aretrieve[D: MilvusDataBase](
218
- self,
219
- query: List[str] | str,
220
- document_model: Type[D],
221
- max_accepted: int = 20,
222
- **kwargs: Unpack[FetchKwargs],
223
- ) -> List[D]:
224
- """Retrieve data from the collection.
225
-
226
- Args:
227
- query (List[str] | str): The query to be used for retrieval.
228
- document_model (Type[D]): The model class used to convert retrieved data into document objects.
229
- max_accepted (int): The final limit on the number of results to return.
230
- **kwargs (Unpack[FetchKwargs]): Additional keyword arguments for retrieval.
231
-
232
- Returns:
233
- List[D]: A list of document objects created from the retrieved data.
234
- """
235
- if isinstance(query, str):
236
- query = [query]
237
-
238
- return (
239
- await self.afetch_document(
240
- query=query,
241
- document_model=document_model,
242
- **kwargs,
243
- )
244
- )[:max_accepted]
245
-
246
- async def arefined_query(
247
- self, question: List[str] | str, **kwargs: Unpack[ChooseKwargs[Optional[List[str]]]]
248
- ) -> Optional[List[str]]:
249
- """Refines the given question using a template.
250
-
251
- Args:
252
- question (List[str] | str): The question to be refined.
253
- **kwargs (Unpack[ChooseKwargs]): Additional keyword arguments for the refinement process.
254
-
255
- Returns:
256
- List[str]: A list of refined questions.
257
- """
258
- return await self.alist_str(
259
- TEMPLATE_MANAGER.render_template(
260
- CONFIG.templates.refined_query_template,
261
- {"question": [question] if isinstance(question, str) else question},
262
- ),
263
- **kwargs,
264
- )