fabricatio 0.2.3.dev3__cp312-cp312-win_amd64.whl → 0.2.4.dev0__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fabricatio/__init__.py CHANGED
@@ -3,6 +3,7 @@
3
3
  from importlib.util import find_spec
4
4
 
5
5
  from fabricatio._rust_instances import template_manager
6
+ from fabricatio.actions import ExtractArticleEssence
6
7
  from fabricatio.core import env
7
8
  from fabricatio.fs import magika
8
9
  from fabricatio.journal import logger
@@ -20,6 +21,7 @@ __all__ = [
20
21
  "Capture",
21
22
  "CodeBlockCapture",
22
23
  "Event",
24
+ "ExtractArticleEssence",
23
25
  "JsonCapture",
24
26
  "Message",
25
27
  "Messages",
@@ -40,6 +42,6 @@ __all__ = [
40
42
 
41
43
 
42
44
  if find_spec("pymilvus"):
43
- from fabricatio.capabilities.rag import Rag
45
+ from fabricatio.capabilities.rag import RAG
44
46
 
45
- __all__ += ["Rag"]
47
+ __all__ += ["RAG"]
Binary file
@@ -1,5 +1,5 @@
1
1
  """module for actions."""
2
2
 
3
- from fabricatio.actions.transmission import PublishTask
3
+ from fabricatio.actions.article import ExtractArticleEssence
4
4
 
5
- __all__ = ["PublishTask"]
5
+ __all__ = ["ExtractArticleEssence"]
@@ -0,0 +1,127 @@
1
+ """Actions for transmitting tasks to targets."""
2
+
3
+ from os import PathLike
4
+ from pathlib import Path
5
+ from typing import Callable, List
6
+
7
+ from pydantic import BaseModel, Field
8
+ from pydantic.config import ConfigDict
9
+
10
+ from fabricatio.journal import logger
11
+ from fabricatio.models.action import Action
12
+ from fabricatio.models.generic import ProposedAble
13
+ from fabricatio.models.task import Task
14
+
15
+
16
+ class Equation(BaseModel):
17
+ """Structured representation of mathematical equations (including their physical or conceptual meanings)."""
18
+
19
+ model_config = ConfigDict(use_attribute_docstrings=True)
20
+
21
+ description: str = Field(...)
22
+ """A concise explanation of the equation's meaning, purpose, and relevance in the context of the research."""
23
+
24
+ latex_code: str = Field(...)
25
+ """The LaTeX code used to represent the equation in a publication-ready format."""
26
+
27
+
28
+ class Figure(BaseModel):
29
+ """Structured representation of figures (including their academic significance and explanatory captions)."""
30
+
31
+ model_config = ConfigDict(use_attribute_docstrings=True)
32
+
33
+ description: str = Field(...)
34
+ """A detailed explanation of the figure's content and its role in conveying key insights."""
35
+
36
+ figure_caption: str = Field(...)
37
+ """The caption accompanying the figure, summarizing its main points and academic value."""
38
+
39
+
40
+ class ArticleEssence(ProposedAble):
41
+ """Structured representation of the core elements of an academic paper(providing a comprehensive digital profile of the paper's essential information)."""
42
+
43
+ # Basic Metadata
44
+ title: str = Field(...)
45
+ """The full title of the paper, including any subtitles if applicable."""
46
+
47
+ authors: List[str] = Field(default_factory=list)
48
+ """A list of the paper's authors, typically in the order of contribution."""
49
+
50
+ keywords: List[str] = Field(default_factory=list)
51
+ """A list of keywords that summarize the paper's focus and facilitate indexing."""
52
+
53
+ publication_year: int = Field(None)
54
+ """The year in which the paper was published."""
55
+
56
+ # Core Content Elements
57
+ domain: List[str] = Field(default_factory=list)
58
+ """The research domains or fields addressed by the paper (e.g., ['Natural Language Processing', 'Computer Vision'])."""
59
+
60
+ abstract: str = Field(...)
61
+ """A structured abstract that outlines the research problem, methodology, and conclusions in three distinct sections."""
62
+
63
+ core_contributions: List[str] = Field(default_factory=list)
64
+ """Key academic contributions that distinguish the paper from prior work in the field."""
65
+
66
+ technical_novelty: List[str] = Field(default_factory=list)
67
+ """Specific technical innovations introduced by the research, listed as individual points."""
68
+
69
+ # Academic Achievements Showcase
70
+ highlighted_equations: List[Equation] = Field(default_factory=list)
71
+ """Core mathematical equations that represent breakthroughs in the field, accompanied by explanations of their physical or conceptual significance."""
72
+
73
+ highlighted_algorithms: List[str] = Field(default_factory=list)
74
+ """Pseudocode for key algorithms, annotated to highlight innovative components."""
75
+
76
+ highlighted_figures: List[Figure] = Field(default_factory=list)
77
+ """Critical diagrams or illustrations, each accompanied by a caption explaining their academic importance."""
78
+
79
+ highlighted_tables: List[str] = Field(default_factory=list)
80
+ """Important data tables, annotated to indicate statistical significance or other notable findings."""
81
+
82
+ # Academic Discussion Dimensions
83
+ research_problem: str = Field("")
84
+ """A clearly defined research question or problem addressed by the study."""
85
+
86
+ limitations: List[str] = Field(default_factory=list)
87
+ """An analysis of the methodological or experimental limitations of the research."""
88
+
89
+ future_work: List[str] = Field(default_factory=list)
90
+ """Suggestions for potential directions or topics for follow-up studies."""
91
+
92
+ impact_analysis: str = Field("")
93
+ """An assessment of the paper's potential influence on the development of the field."""
94
+
95
+
96
+ class ExtractArticleEssence(Action):
97
+ """Extract the essence of article(s)."""
98
+
99
+ name: str = "extract article essence"
100
+ """The name of the action."""
101
+ description: str = "Extract the essence of an article. output as json"
102
+ """The description of the action."""
103
+
104
+ output_key: str = "article_essence"
105
+ """The key of the output data."""
106
+
107
+ async def _execute[P: PathLike | str](
108
+ self,
109
+ task_input: Task,
110
+ reader: Callable[[P], str] = lambda p: Path(p).read_text(encoding="utf-8"),
111
+ **_,
112
+ ) -> List[ArticleEssence]:
113
+ if not await self.ajudge(
114
+ f"= Task\n{task_input.briefing}\n\n\n= Role\n{self.briefing}",
115
+ affirm_case="The task does not violate the role, and could be approved since the file dependencies are specified.",
116
+ deny_case="The task does violate the role, and could not be approved.",
117
+ ):
118
+ logger.info(err := "Task not approved.")
119
+ raise RuntimeError(err)
120
+
121
+ # trim the references
122
+ contents = ["References".join(c.split("References")[:-1]) for c in map(reader, task_input.dependencies)]
123
+ return await self.propose(
124
+ ArticleEssence,
125
+ contents,
126
+ system_message=f"# your personal briefing: \n{self.briefing}",
127
+ )
@@ -0,0 +1,55 @@
1
+ """A module for the task capabilities of the Fabricatio library."""
2
+
3
+ from typing import List, Type, Unpack, overload
4
+
5
+ from fabricatio.models.generic import ProposedAble
6
+ from fabricatio.models.kwargs_types import GenerateKwargs
7
+ from fabricatio.models.usages import LLMUsage
8
+
9
+
10
+ class Propose[M: ProposedAble](LLMUsage):
11
+ """A class that proposes an Obj based on a prompt."""
12
+
13
+ @overload
14
+ async def propose(
15
+ self,
16
+ cls: Type[M],
17
+ prompt: List[str],
18
+ **kwargs: Unpack[GenerateKwargs],
19
+ ) -> List[M]: ...
20
+
21
+ @overload
22
+ async def propose(
23
+ self,
24
+ cls: Type[M],
25
+ prompt: str,
26
+ **kwargs: Unpack[GenerateKwargs],
27
+ ) -> M: ...
28
+
29
+ async def propose(
30
+ self,
31
+ cls: Type[M],
32
+ prompt: List[str] | str,
33
+ **kwargs: Unpack[GenerateKwargs],
34
+ ) -> List[M] | M:
35
+ """Asynchronously proposes a task based on a given prompt and parameters.
36
+
37
+ Parameters:
38
+ cls: The class type of the task to be proposed.
39
+ prompt: The prompt text for proposing a task, which is a string that must be provided.
40
+ **kwargs: The keyword arguments for the LLM (Large Language Model) usage.
41
+
42
+ Returns:
43
+ A Task object based on the proposal result.
44
+ """
45
+ if isinstance(prompt, str):
46
+ return await self.aask_validate(
47
+ question=cls.create_json_prompt(prompt),
48
+ validator=cls.instantiate_from_string,
49
+ **kwargs,
50
+ )
51
+ return await self.aask_validate_batch(
52
+ questions=[cls.create_json_prompt(p) for p in prompt],
53
+ validator=cls.instantiate_from_string,
54
+ **kwargs,
55
+ )
@@ -1,71 +1,114 @@
1
1
  """A module for the RAG (Retrieval Augmented Generation) model."""
2
2
 
3
+ try:
4
+ from pymilvus import MilvusClient
5
+ except ImportError as e:
6
+ raise RuntimeError("pymilvus is not installed. Have you installed `fabricatio[rag]` instead of `fabricatio`") from e
3
7
  from functools import lru_cache
4
8
  from operator import itemgetter
5
9
  from os import PathLike
6
10
  from pathlib import Path
7
- from typing import Any, Callable, Dict, List, Optional, Self, Union, Unpack
11
+ from typing import Any, Callable, Dict, List, Optional, Self, Union, Unpack, overload
8
12
 
9
- from fabricatio import template_manager
13
+ from fabricatio._rust_instances import template_manager
10
14
  from fabricatio.config import configs
11
- from fabricatio.models.kwargs_types import LLMKwargs
12
- from fabricatio.models.usages import LLMUsage
15
+ from fabricatio.journal import logger
16
+ from fabricatio.models.kwargs_types import CollectionSimpleConfigKwargs, EmbeddingKwargs, FetchKwargs, LLMKwargs
17
+ from fabricatio.models.usages import EmbeddingUsage
13
18
  from fabricatio.models.utils import MilvusData
14
19
  from more_itertools.recipes import flatten
15
-
16
- try:
17
- from pymilvus import MilvusClient
18
- except ImportError as e:
19
- raise RuntimeError("pymilvus is not installed. Have you installed `fabricatio[rag]` instead of `fabricatio`") from e
20
20
  from pydantic import Field, PrivateAttr
21
21
 
22
22
 
23
23
  @lru_cache(maxsize=None)
24
- def create_client(
25
- uri: Optional[str] = None, token: Optional[str] = None, timeout: Optional[float] = None
26
- ) -> MilvusClient:
24
+ def create_client(uri: str, token: str = "", timeout: Optional[float] = None) -> MilvusClient:
27
25
  """Create a Milvus client."""
28
26
  return MilvusClient(
29
- uri=uri or configs.rag.milvus_uri.unicode_string(),
30
- token=token or configs.rag.milvus_token.get_secret_value() if configs.rag.milvus_token else "",
31
- timeout=timeout or configs.rag.milvus_timeout,
27
+ uri=uri,
28
+ token=token,
29
+ timeout=timeout,
32
30
  )
33
31
 
34
32
 
35
- class Rag(LLMUsage):
33
+ class RAG(EmbeddingUsage):
36
34
  """A class representing the RAG (Retrieval Augmented Generation) model."""
37
35
 
38
- milvus_uri: Optional[str] = Field(default=None, frozen=True)
39
- """The URI of the Milvus server."""
40
- milvus_token: Optional[str] = Field(default=None, frozen=True)
41
- """The token for the Milvus server."""
42
- milvus_timeout: Optional[float] = Field(default=None, frozen=True)
43
- """The timeout for the Milvus server."""
44
36
  target_collection: Optional[str] = Field(default=None)
45
37
  """The name of the collection being viewed."""
46
38
 
47
- _client: MilvusClient = PrivateAttr(None)
39
+ _client: Optional[MilvusClient] = PrivateAttr(None)
48
40
  """The Milvus client used for the RAG model."""
49
41
 
50
42
  @property
51
43
  def client(self) -> MilvusClient:
52
44
  """Return the Milvus client."""
45
+ if self._client is None:
46
+ raise RuntimeError("Client is not initialized. Have you called `self.init_client()`?")
53
47
  return self._client
54
48
 
55
- def model_post_init(self, __context: Any) -> None:
56
- """Initialize the RAG model by creating the collection if it does not exist."""
57
- self._client = create_client(self.milvus_uri, self.milvus_token, self.milvus_timeout)
58
- self.view(self.target_collection, create=True)
49
+ def init_client(
50
+ self,
51
+ milvus_uri: Optional[str] = None,
52
+ milvus_token: Optional[str] = None,
53
+ milvus_timeout: Optional[float] = None,
54
+ ) -> Self:
55
+ """Initialize the Milvus client."""
56
+ self._client = create_client(
57
+ uri=milvus_uri or (self.milvus_uri or configs.rag.milvus_uri).unicode_string(),
58
+ token=milvus_token
59
+ or (token.get_secret_value() if (token := (self.milvus_token or configs.rag.milvus_token)) else ""),
60
+ timeout=milvus_timeout or self.milvus_timeout,
61
+ )
62
+ return self
63
+
64
+ @overload
65
+ async def pack(
66
+ self, input_text: List[str], subject: Optional[str] = None, **kwargs: Unpack[EmbeddingKwargs]
67
+ ) -> List[MilvusData]: ...
68
+ @overload
69
+ async def pack(
70
+ self, input_text: str, subject: Optional[str] = None, **kwargs: Unpack[EmbeddingKwargs]
71
+ ) -> MilvusData: ...
72
+
73
+ async def pack(
74
+ self, input_text: List[str] | str, subject: Optional[str] = None, **kwargs: Unpack[EmbeddingKwargs]
75
+ ) -> List[MilvusData] | MilvusData:
76
+ """Asynchronously generates MilvusData objects for the given input text.
59
77
 
60
- def view(self, collection_name: Optional[str], create: bool = False) -> Self:
78
+ Args:
79
+ input_text (List[str] | str): A string or list of strings to generate embeddings for.
80
+ subject (Optional[str]): The subject of the input text. Defaults to None.
81
+ **kwargs (Unpack[EmbeddingKwargs]): Additional keyword arguments for embedding.
82
+
83
+ Returns:
84
+ List[MilvusData] | MilvusData: The generated MilvusData objects.
85
+ """
86
+ if isinstance(input_text, str):
87
+ return MilvusData(vector=await self.vectorize(input_text, **kwargs), text=input_text, subject=subject)
88
+ vecs = await self.vectorize(input_text, **kwargs)
89
+ return [
90
+ MilvusData(
91
+ vector=vec,
92
+ text=text,
93
+ subject=subject,
94
+ )
95
+ for text, vec in zip(input_text, vecs, strict=True)
96
+ ]
97
+
98
+ def view(
99
+ self, collection_name: Optional[str], create: bool = False, **kwargs: Unpack[CollectionSimpleConfigKwargs]
100
+ ) -> Self:
61
101
  """View the specified collection.
62
102
 
63
103
  Args:
64
104
  collection_name (str): The name of the collection.
65
105
  create (bool): Whether to create the collection if it does not exist.
106
+ **kwargs (Unpack[CollectionSimpleConfigKwargs]): Additional keyword arguments for collection configuration.
66
107
  """
67
108
  if create and collection_name and not self._client.has_collection(collection_name):
68
- self._client.create_collection(collection_name)
109
+ kwargs["dimension"] = kwargs.get("dimension") or self.milvus_dimensions or configs.rag.milvus_dimensions
110
+ self._client.create_collection(collection_name, auto_id=True, **kwargs)
111
+ logger.info(f"Creating collection {collection_name}")
69
112
 
70
113
  self.target_collection = collection_name
71
114
  return self
@@ -90,13 +133,14 @@ class Rag(LLMUsage):
90
133
  return self.target_collection
91
134
 
92
135
  def add_document[D: Union[Dict[str, Any], MilvusData]](
93
- self, data: D | List[D], collection_name: Optional[str] = None
136
+ self, data: D | List[D], collection_name: Optional[str] = None, flush: bool = False
94
137
  ) -> Self:
95
138
  """Adds a document to the specified collection.
96
139
 
97
140
  Args:
98
141
  data (Union[Dict[str, Any], MilvusData] | List[Union[Dict[str, Any], MilvusData]]): The data to be added to the collection.
99
142
  collection_name (Optional[str]): The name of the collection. If not provided, the currently viewed collection is used.
143
+ flush (bool): Whether to flush the collection after insertion.
100
144
 
101
145
  Returns:
102
146
  Self: The current instance, allowing for method chaining.
@@ -105,11 +149,19 @@ class Rag(LLMUsage):
105
149
  data = data.prepare_insertion()
106
150
  if isinstance(data, list):
107
151
  data = [d.prepare_insertion() if isinstance(d, MilvusData) else d for d in data]
108
- self._client.insert(collection_name or self.safe_target_collection, data)
152
+ c_name = collection_name or self.safe_target_collection
153
+ self._client.insert(c_name, data)
154
+
155
+ if flush:
156
+ logger.debug(f"Flushing collection {c_name}")
157
+ self._client.flush(c_name)
109
158
  return self
110
159
 
111
- def consume(
112
- self, source: PathLike, reader: Callable[[PathLike], MilvusData], collection_name: Optional[str] = None
160
+ async def consume_file(
161
+ self,
162
+ source: List[PathLike] | PathLike,
163
+ reader: Callable[[PathLike], str] = lambda path: Path(path).read_text(encoding="utf-8"),
164
+ collection_name: Optional[str] = None,
113
165
  ) -> Self:
114
166
  """Consume a file and add its content to the collection.
115
167
 
@@ -121,8 +173,21 @@ class Rag(LLMUsage):
121
173
  Returns:
122
174
  Self: The current instance, allowing for method chaining.
123
175
  """
124
- data = reader(Path(source))
125
- self.add_document(data, collection_name or self.safe_target_collection)
176
+ if not isinstance(source, list):
177
+ source = [source]
178
+ return await self.consume_string([reader(s) for s in source], collection_name)
179
+
180
+ async def consume_string(self, text: List[str] | str, collection_name: Optional[str] = None) -> Self:
181
+ """Consume a string and add it to the collection.
182
+
183
+ Args:
184
+ text (List[str] | str): The text to be added to the collection.
185
+ collection_name (Optional[str]): The name of the collection. If not provided, the currently viewed collection is used.
186
+
187
+ Returns:
188
+ Self: The current instance, allowing for method chaining.
189
+ """
190
+ self.add_document(await self.pack(text), collection_name or self.safe_target_collection, flush=True)
126
191
  return self
127
192
 
128
193
  async def afetch_document(
@@ -130,6 +195,7 @@ class Rag(LLMUsage):
130
195
  vecs: List[List[float]],
131
196
  desired_fields: List[str] | str,
132
197
  collection_name: Optional[str] = None,
198
+ similarity_threshold: float = 0.37,
133
199
  result_per_query: int = 10,
134
200
  ) -> List[Dict[str, Any]] | List[Any]:
135
201
  """Fetch data from the collection.
@@ -138,6 +204,7 @@ class Rag(LLMUsage):
138
204
  vecs (List[List[float]]): The vectors to search for.
139
205
  desired_fields (List[str] | str): The fields to retrieve.
140
206
  collection_name (Optional[str]): The name of the collection. If not provided, the currently viewed collection is used.
207
+ similarity_threshold (float): The threshold for similarity, only results above this threshold will be returned.
141
208
  result_per_query (int): The number of results to return per query.
142
209
 
143
210
  Returns:
@@ -147,6 +214,7 @@ class Rag(LLMUsage):
147
214
  search_results = self._client.search(
148
215
  collection_name or self.safe_target_collection,
149
216
  vecs,
217
+ search_params={"radius": similarity_threshold},
150
218
  output_fields=desired_fields if isinstance(desired_fields, list) else [desired_fields],
151
219
  limit=result_per_query,
152
220
  )
@@ -157,6 +225,7 @@ class Rag(LLMUsage):
157
225
  # Step 3: Sort by distance (descending)
158
226
  sorted_results = sorted(flattened_results, key=itemgetter("distance"), reverse=True)
159
227
 
228
+ logger.debug(f"Searched similarities: {[t['distance'] for t in sorted_results]}")
160
229
  # Step 4: Extract the entities
161
230
  resp = [result["entity"] for result in sorted_results]
162
231
 
@@ -168,27 +237,29 @@ class Rag(LLMUsage):
168
237
  self,
169
238
  query: List[str] | str,
170
239
  collection_name: Optional[str] = None,
171
- result_per_query: int = 10,
172
240
  final_limit: int = 20,
241
+ **kwargs: Unpack[FetchKwargs],
173
242
  ) -> List[str]:
174
243
  """Retrieve data from the collection.
175
244
 
176
245
  Args:
177
246
  query (List[str] | str): The query to be used for retrieval.
178
247
  collection_name (Optional[str]): The name of the collection. If not provided, the currently viewed collection is used.
179
- result_per_query (int): The number of results to be returned per query.
180
248
  final_limit (int): The final limit on the number of results to return.
249
+ **kwargs (Unpack[FetchKwargs]): Additional keyword arguments for retrieval.
181
250
 
182
251
  Returns:
183
252
  List[str]: A list of strings containing the retrieved data.
184
253
  """
185
254
  if isinstance(query, str):
186
255
  query = [query]
187
- return await self.afetch_document(
188
- vecs=(await self.vectorize(query)),
189
- desired_fields="text",
190
- collection_name=collection_name,
191
- result_per_query=result_per_query,
256
+ return (
257
+ await self.afetch_document(
258
+ vecs=(await self.vectorize(query)),
259
+ desired_fields="text",
260
+ collection_name=collection_name,
261
+ **kwargs,
262
+ )
192
263
  )[:final_limit]
193
264
 
194
265
  async def aask_retrieved(
@@ -196,8 +267,10 @@ class Rag(LLMUsage):
196
267
  question: str | List[str],
197
268
  query: List[str] | str,
198
269
  collection_name: Optional[str] = None,
270
+ extra_system_message: str = "",
199
271
  result_per_query: int = 10,
200
272
  final_limit: int = 20,
273
+ similarity_threshold: float = 0.37,
201
274
  **kwargs: Unpack[LLMKwargs],
202
275
  ) -> str:
203
276
  """Asks a question by retrieving relevant documents based on the provided query.
@@ -210,16 +283,28 @@ class Rag(LLMUsage):
210
283
  query (List[str] | str): The query or list of queries used for document retrieval.
211
284
  collection_name (Optional[str]): The name of the collection to retrieve documents from.
212
285
  If not provided, the currently viewed collection is used.
286
+ extra_system_message (str): An additional system message to be included in the prompt.
213
287
  result_per_query (int): The number of results to return per query. Default is 10.
214
288
  final_limit (int): The maximum number of retrieved documents to consider. Default is 20.
289
+ similarity_threshold (float): The threshold for similarity, only results above this threshold will be returned.
215
290
  **kwargs (Unpack[LLMKwargs]): Additional keyword arguments passed to the underlying `aask` method.
216
291
 
217
292
  Returns:
218
293
  str: A string response generated after asking with the context of retrieved documents.
219
294
  """
220
- docs = await self.aretrieve(query, collection_name, result_per_query, final_limit)
295
+ docs = await self.aretrieve(
296
+ query,
297
+ collection_name,
298
+ final_limit,
299
+ result_per_query=result_per_query,
300
+ similarity_threshold=similarity_threshold,
301
+ )
302
+
303
+ rendered = template_manager.render_template(configs.templates.retrieved_display_template, {"docs": docs[::-1]})
304
+
305
+ logger.debug(f"Retrieved Documents: \n{rendered}")
221
306
  return await self.aask(
222
307
  question,
223
- template_manager.render_template(configs.templates.retrieved_display_template, {"docs": docs}),
308
+ f"{rendered}\n\n{extra_system_message}",
224
309
  **kwargs,
225
310
  )
@@ -5,21 +5,21 @@ from typing import Any, Dict, List, Optional, Tuple, Unpack
5
5
 
6
6
  import orjson
7
7
  from fabricatio._rust_instances import template_manager
8
+ from fabricatio.capabilities.propose import Propose
8
9
  from fabricatio.config import configs
9
10
  from fabricatio.models.generic import WithBriefing
10
11
  from fabricatio.models.kwargs_types import ChooseKwargs, ValidateKwargs
11
12
  from fabricatio.models.task import Task
12
13
  from fabricatio.models.tool import Tool, ToolExecutor
13
- from fabricatio.models.usages import LLMUsage, ToolBoxUsage
14
+ from fabricatio.models.usages import ToolBoxUsage
14
15
  from fabricatio.parser import JsonCapture, PythonCapture
15
16
  from loguru import logger
16
- from pydantic import ValidationError
17
17
 
18
18
 
19
- class ProposeTask(WithBriefing, LLMUsage):
19
+ class ProposeTask(WithBriefing, Propose):
20
20
  """A class that proposes a task based on a prompt."""
21
21
 
22
- async def propose[T](
22
+ async def propose_task[T](
23
23
  self,
24
24
  prompt: str,
25
25
  **kwargs: Unpack[ValidateKwargs],
@@ -34,27 +34,10 @@ class ProposeTask(WithBriefing, LLMUsage):
34
34
  A Task object based on the proposal result.
35
35
  """
36
36
  if not prompt:
37
- err = f"{self.name}: Prompt must be provided."
38
- logger.error(err)
37
+ logger.error(err := f"{self.name}: Prompt must be provided.")
39
38
  raise ValueError(err)
40
39
 
41
- def _validate_json(response: str) -> None | Task:
42
- try:
43
- cap = JsonCapture.capture(response)
44
- logger.debug(f"Response: \n{response}")
45
- logger.info(f"Captured JSON: \n{cap}")
46
- return Task.model_validate_json(cap)
47
- except ValidationError as e:
48
- logger.error(f"Failed to parse task from JSON: {e}")
49
- return None
50
-
51
- template_data = {"prompt": prompt, "json_example": Task.json_example()}
52
- return await self.aask_validate(
53
- question=template_manager.render_template(configs.templates.propose_task_template, template_data),
54
- validator=_validate_json,
55
- system_message=f"# your personal briefing: \n{self.briefing}",
56
- **kwargs,
57
- )
40
+ return await self.propose(Task, prompt, system_message=f"# your personal briefing: \n{self.briefing}", **kwargs)
58
41
 
59
42
 
60
43
  class HandleTask(WithBriefing, ToolBoxUsage):
fabricatio/config.py CHANGED
@@ -80,6 +80,33 @@ class LLMConfig(BaseModel):
80
80
  """The maximum number of tokens to generate. Set to 8192 as per request."""
81
81
 
82
82
 
83
+ class EmbeddingConfig(BaseModel):
84
+ """Embedding configuration class."""
85
+
86
+ model_config = ConfigDict(use_attribute_docstrings=True)
87
+
88
+ model: str = Field(default="text-embedding-ada-002")
89
+ """The embedding model name. """
90
+
91
+ dimensions: Optional[PositiveInt] = Field(default=None)
92
+ """The dimensions of the embedding. None means not checked."""
93
+
94
+ timeout: Optional[PositiveInt] = Field(default=None)
95
+ """The timeout of the embedding model in seconds."""
96
+
97
+ max_sequence_length: PositiveInt = Field(default=8192)
98
+ """The maximum sequence length of the embedding model. Default is 8192 as per request."""
99
+
100
+ caching: bool = Field(default=False)
101
+ """Whether to cache the embedding. Default is False."""
102
+
103
+ api_endpoint: Optional[HttpUrl] = None
104
+ """The OpenAI API endpoint."""
105
+
106
+ api_key: Optional[SecretStr] = None
107
+ """The OpenAI API key."""
108
+
109
+
83
110
  class PymitterConfig(BaseModel):
84
111
  """Pymitter configuration class.
85
112
 
@@ -140,8 +167,8 @@ class TemplateConfig(BaseModel):
140
167
  template_suffix: str = Field(default="hbs", frozen=True)
141
168
  """The suffix of the templates."""
142
169
 
143
- propose_task_template: str = Field(default="propose_task")
144
- """The name of the propose task template which will be used to propose a task."""
170
+ create_json_obj_template: str = Field(default="create_json_obj")
171
+ """The name of the create json object template which will be used to create a json object."""
145
172
 
146
173
  draft_tool_usage_code_template: str = Field(default="draft_tool_usage_code")
147
174
  """The name of the draft tool usage code template which will be used to draft tool usage code."""
@@ -179,6 +206,9 @@ class TemplateConfig(BaseModel):
179
206
  retrieved_display_template: str = Field(default="retrieved_display")
180
207
  """The name of the retrieved display template which will be used to display retrieved documents."""
181
208
 
209
+ liststr_template: str = Field(default="liststr")
210
+ """The name of the liststr template which will be used to display a list of strings."""
211
+
182
212
 
183
213
  class MagikaConfig(BaseModel):
184
214
  """Magika configuration class."""
@@ -222,6 +252,8 @@ class RagConfig(BaseModel):
222
252
  """The timeout of the Milvus server."""
223
253
  milvus_token: Optional[SecretStr] = Field(default=None)
224
254
  """The token of the Milvus server."""
255
+ milvus_dimensions: Optional[PositiveInt] = Field(default=None)
256
+ """The dimensions of the Milvus server."""
225
257
 
226
258
 
227
259
  class Settings(BaseSettings):
@@ -249,6 +281,9 @@ class Settings(BaseSettings):
249
281
  llm: LLMConfig = Field(default_factory=LLMConfig)
250
282
  """LLM Configuration"""
251
283
 
284
+ embedding: EmbeddingConfig = Field(default_factory=EmbeddingConfig)
285
+ """Embedding Configuration"""
286
+
252
287
  debug: DebugConfig = Field(default_factory=DebugConfig)
253
288
  """Debug Configuration"""
254
289
 
@@ -46,6 +46,7 @@ class Action(HandleTask, ProposeTask, GiveRating):
46
46
  cxt[self.output_key] = ret
47
47
  return cxt
48
48
 
49
+ @property
49
50
  def briefing(self) -> str:
50
51
  """Return a brief description of the action."""
51
52
  if self.personality: