fabricatio 0.2.4.dev0__cp312-cp312-manylinux_2_34_x86_64.whl → 0.2.4.dev2__cp312-cp312-manylinux_2_34_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fabricatio/__init__.py +6 -2
- fabricatio/_rust.cpython-312-x86_64-linux-gnu.so +0 -0
- fabricatio/actions/article.py +8 -86
- fabricatio/actions/rag.py +25 -0
- fabricatio/capabilities/rating.py +12 -36
- fabricatio/fs/__init__.py +14 -2
- fabricatio/models/action.py +19 -4
- fabricatio/models/extra.py +96 -0
- fabricatio/models/generic.py +36 -0
- fabricatio/models/usages.py +15 -23
- fabricatio/parser.py +6 -10
- fabricatio/workflows/articles.py +11 -0
- fabricatio-0.2.4.dev2.data/scripts/tdown +0 -0
- {fabricatio-0.2.4.dev0.dist-info → fabricatio-0.2.4.dev2.dist-info}/METADATA +1 -1
- {fabricatio-0.2.4.dev0.dist-info → fabricatio-0.2.4.dev2.dist-info}/RECORD +17 -15
- fabricatio/actions/__init__.py +0 -5
- fabricatio-0.2.4.dev0.data/scripts/tdown +0 -0
- {fabricatio-0.2.4.dev0.dist-info → fabricatio-0.2.4.dev2.dist-info}/WHEEL +0 -0
- {fabricatio-0.2.4.dev0.dist-info → fabricatio-0.2.4.dev2.dist-info}/licenses/LICENSE +0 -0
fabricatio/__init__.py
CHANGED
@@ -3,12 +3,13 @@
|
|
3
3
|
from importlib.util import find_spec
|
4
4
|
|
5
5
|
from fabricatio._rust_instances import template_manager
|
6
|
-
from fabricatio.actions import ExtractArticleEssence
|
6
|
+
from fabricatio.actions.article import ExtractArticleEssence
|
7
7
|
from fabricatio.core import env
|
8
8
|
from fabricatio.fs import magika
|
9
9
|
from fabricatio.journal import logger
|
10
10
|
from fabricatio.models.action import Action, WorkFlow
|
11
11
|
from fabricatio.models.events import Event
|
12
|
+
from fabricatio.models.extra import ArticleEssence
|
12
13
|
from fabricatio.models.role import Role
|
13
14
|
from fabricatio.models.task import Task
|
14
15
|
from fabricatio.models.tool import ToolBox
|
@@ -18,6 +19,7 @@ from fabricatio.toolboxes import arithmetic_toolbox, basic_toolboxes, fs_toolbox
|
|
18
19
|
|
19
20
|
__all__ = [
|
20
21
|
"Action",
|
22
|
+
"ArticleEssence",
|
21
23
|
"Capture",
|
22
24
|
"CodeBlockCapture",
|
23
25
|
"Event",
|
@@ -42,6 +44,8 @@ __all__ = [
|
|
42
44
|
|
43
45
|
|
44
46
|
if find_spec("pymilvus"):
|
47
|
+
from fabricatio.actions.rag import InjectToDB
|
45
48
|
from fabricatio.capabilities.rag import RAG
|
49
|
+
from fabricatio.workflows.articles import StoreArticle
|
46
50
|
|
47
|
-
__all__ += ["RAG"]
|
51
|
+
__all__ += ["RAG", "InjectToDB", "StoreArticle"]
|
Binary file
|
fabricatio/actions/article.py
CHANGED
@@ -4,101 +4,23 @@ from os import PathLike
|
|
4
4
|
from pathlib import Path
|
5
5
|
from typing import Callable, List
|
6
6
|
|
7
|
-
from pydantic import BaseModel, Field
|
8
|
-
from pydantic.config import ConfigDict
|
9
|
-
|
10
7
|
from fabricatio.journal import logger
|
11
8
|
from fabricatio.models.action import Action
|
12
|
-
from fabricatio.models.
|
9
|
+
from fabricatio.models.extra import ArticleEssence
|
13
10
|
from fabricatio.models.task import Task
|
14
11
|
|
15
12
|
|
16
|
-
class Equation(BaseModel):
|
17
|
-
"""Structured representation of mathematical equations (including their physical or conceptual meanings)."""
|
18
|
-
|
19
|
-
model_config = ConfigDict(use_attribute_docstrings=True)
|
20
|
-
|
21
|
-
description: str = Field(...)
|
22
|
-
"""A concise explanation of the equation's meaning, purpose, and relevance in the context of the research."""
|
23
|
-
|
24
|
-
latex_code: str = Field(...)
|
25
|
-
"""The LaTeX code used to represent the equation in a publication-ready format."""
|
26
|
-
|
27
|
-
|
28
|
-
class Figure(BaseModel):
|
29
|
-
"""Structured representation of figures (including their academic significance and explanatory captions)."""
|
30
|
-
|
31
|
-
model_config = ConfigDict(use_attribute_docstrings=True)
|
32
|
-
|
33
|
-
description: str = Field(...)
|
34
|
-
"""A detailed explanation of the figure's content and its role in conveying key insights."""
|
35
|
-
|
36
|
-
figure_caption: str = Field(...)
|
37
|
-
"""The caption accompanying the figure, summarizing its main points and academic value."""
|
38
|
-
|
39
|
-
|
40
|
-
class ArticleEssence(ProposedAble):
|
41
|
-
"""Structured representation of the core elements of an academic paper(providing a comprehensive digital profile of the paper's essential information)."""
|
42
|
-
|
43
|
-
# Basic Metadata
|
44
|
-
title: str = Field(...)
|
45
|
-
"""The full title of the paper, including any subtitles if applicable."""
|
46
|
-
|
47
|
-
authors: List[str] = Field(default_factory=list)
|
48
|
-
"""A list of the paper's authors, typically in the order of contribution."""
|
49
|
-
|
50
|
-
keywords: List[str] = Field(default_factory=list)
|
51
|
-
"""A list of keywords that summarize the paper's focus and facilitate indexing."""
|
52
|
-
|
53
|
-
publication_year: int = Field(None)
|
54
|
-
"""The year in which the paper was published."""
|
55
|
-
|
56
|
-
# Core Content Elements
|
57
|
-
domain: List[str] = Field(default_factory=list)
|
58
|
-
"""The research domains or fields addressed by the paper (e.g., ['Natural Language Processing', 'Computer Vision'])."""
|
59
|
-
|
60
|
-
abstract: str = Field(...)
|
61
|
-
"""A structured abstract that outlines the research problem, methodology, and conclusions in three distinct sections."""
|
62
|
-
|
63
|
-
core_contributions: List[str] = Field(default_factory=list)
|
64
|
-
"""Key academic contributions that distinguish the paper from prior work in the field."""
|
65
|
-
|
66
|
-
technical_novelty: List[str] = Field(default_factory=list)
|
67
|
-
"""Specific technical innovations introduced by the research, listed as individual points."""
|
68
|
-
|
69
|
-
# Academic Achievements Showcase
|
70
|
-
highlighted_equations: List[Equation] = Field(default_factory=list)
|
71
|
-
"""Core mathematical equations that represent breakthroughs in the field, accompanied by explanations of their physical or conceptual significance."""
|
72
|
-
|
73
|
-
highlighted_algorithms: List[str] = Field(default_factory=list)
|
74
|
-
"""Pseudocode for key algorithms, annotated to highlight innovative components."""
|
75
|
-
|
76
|
-
highlighted_figures: List[Figure] = Field(default_factory=list)
|
77
|
-
"""Critical diagrams or illustrations, each accompanied by a caption explaining their academic importance."""
|
78
|
-
|
79
|
-
highlighted_tables: List[str] = Field(default_factory=list)
|
80
|
-
"""Important data tables, annotated to indicate statistical significance or other notable findings."""
|
81
|
-
|
82
|
-
# Academic Discussion Dimensions
|
83
|
-
research_problem: str = Field("")
|
84
|
-
"""A clearly defined research question or problem addressed by the study."""
|
85
|
-
|
86
|
-
limitations: List[str] = Field(default_factory=list)
|
87
|
-
"""An analysis of the methodological or experimental limitations of the research."""
|
88
|
-
|
89
|
-
future_work: List[str] = Field(default_factory=list)
|
90
|
-
"""Suggestions for potential directions or topics for follow-up studies."""
|
91
|
-
|
92
|
-
impact_analysis: str = Field("")
|
93
|
-
"""An assessment of the paper's potential influence on the development of the field."""
|
94
|
-
|
95
|
-
|
96
13
|
class ExtractArticleEssence(Action):
|
97
|
-
"""Extract the essence of article(s).
|
14
|
+
"""Extract the essence of article(s) in text format from the paths specified in the task dependencies.
|
15
|
+
|
16
|
+
Notes:
|
17
|
+
This action is designed to extract vital information from articles with Markdown format, which is pure text, and
|
18
|
+
which is converted from pdf files using `magic-pdf` from the `MinerU` project, see https://github.com/opendatalab/MinerU
|
19
|
+
"""
|
98
20
|
|
99
21
|
name: str = "extract article essence"
|
100
22
|
"""The name of the action."""
|
101
|
-
description: str = "Extract the essence of
|
23
|
+
description: str = "Extract the essence of article(s) from the paths specified in the task dependencies."
|
102
24
|
"""The description of the action."""
|
103
25
|
|
104
26
|
output_key: str = "article_essence"
|
@@ -0,0 +1,25 @@
|
|
1
|
+
"""Inject data into the database."""
|
2
|
+
|
3
|
+
from typing import List, Optional, Unpack
|
4
|
+
|
5
|
+
from fabricatio.capabilities.rag import RAG
|
6
|
+
from fabricatio.models.action import Action
|
7
|
+
from fabricatio.models.generic import PrepareVectorization
|
8
|
+
|
9
|
+
|
10
|
+
class InjectToDB(Action, RAG):
|
11
|
+
"""Inject data into the database."""
|
12
|
+
|
13
|
+
output_key: str = "collection_name"
|
14
|
+
|
15
|
+
async def _execute[T: PrepareVectorization](
|
16
|
+
self, to_inject: T | List[T], collection_name: Optional[str] = "my_collection", **cxt: Unpack
|
17
|
+
) -> str:
|
18
|
+
if not isinstance(to_inject, list):
|
19
|
+
to_inject = [to_inject]
|
20
|
+
|
21
|
+
await self.view(collection_name, create=True).consume_string(
|
22
|
+
[t.prepare_vectorization(self.embedding_max_sequence_length) for t in to_inject],
|
23
|
+
)
|
24
|
+
|
25
|
+
return collection_name
|
@@ -131,8 +131,7 @@ class GiveRating(WithBriefing, LLMUsage):
|
|
131
131
|
|
132
132
|
def _validator(response: str) -> Dict[str, str] | None:
|
133
133
|
if (
|
134
|
-
(json_data := JsonCapture.
|
135
|
-
and isinstance(json_data, dict)
|
134
|
+
(json_data := JsonCapture.validate_with(response, target_type=dict, elements_type=str)) is not None
|
136
135
|
and json_data.keys() == criteria
|
137
136
|
and all(isinstance(v, str) for v in json_data.values())
|
138
137
|
):
|
@@ -173,11 +172,10 @@ class GiveRating(WithBriefing, LLMUsage):
|
|
173
172
|
|
174
173
|
def _validator(response: str) -> Set[str] | None:
|
175
174
|
if (
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
):
|
175
|
+
json_data := JsonCapture.validate_with(
|
176
|
+
response, target_type=list, elements_type=str, length=criteria_count
|
177
|
+
)
|
178
|
+
) is not None:
|
181
179
|
return set(json_data)
|
182
180
|
return None
|
183
181
|
|
@@ -219,27 +217,6 @@ class GiveRating(WithBriefing, LLMUsage):
|
|
219
217
|
Returns:
|
220
218
|
Set[str]: A set of drafted rating criteria.
|
221
219
|
"""
|
222
|
-
|
223
|
-
def _reasons_validator(response: str) -> List[str] | None:
|
224
|
-
if (
|
225
|
-
(json_data := JsonCapture.convert_with(response, orjson.loads)) is not None
|
226
|
-
and isinstance(json_data, list)
|
227
|
-
and all(isinstance(v, str) for v in json_data)
|
228
|
-
and len(json_data) == reasons_count
|
229
|
-
):
|
230
|
-
return json_data
|
231
|
-
return None
|
232
|
-
|
233
|
-
def _criteria_validator(response: str) -> Set[str] | None:
|
234
|
-
if (
|
235
|
-
(json_data := JsonCapture.convert_with(response, orjson.loads)) is not None
|
236
|
-
and isinstance(json_data, list)
|
237
|
-
and all(isinstance(v, str) for v in json_data)
|
238
|
-
and len(json_data) == criteria_count
|
239
|
-
):
|
240
|
-
return set(json_data)
|
241
|
-
return None
|
242
|
-
|
243
220
|
kwargs = GenerateKwargs(system_message=f"# your personal briefing: \n{self.briefing}", **kwargs)
|
244
221
|
# extract reasons from the comparison of ordered pairs of extracted from examples
|
245
222
|
reasons = flatten(
|
@@ -256,7 +233,9 @@ class GiveRating(WithBriefing, LLMUsage):
|
|
256
233
|
)
|
257
234
|
for pair in (permutations(examples, 2))
|
258
235
|
],
|
259
|
-
validator=
|
236
|
+
validator=lambda resp: JsonCapture.validate_with(
|
237
|
+
resp, target_type=list, elements_type=str, length=reasons_count
|
238
|
+
),
|
260
239
|
**kwargs,
|
261
240
|
)
|
262
241
|
)
|
@@ -272,7 +251,9 @@ class GiveRating(WithBriefing, LLMUsage):
|
|
272
251
|
},
|
273
252
|
)
|
274
253
|
),
|
275
|
-
validator=
|
254
|
+
validator=lambda resp: set(out)
|
255
|
+
if (out := JsonCapture.validate_with(resp, target_type=list, elements_type=str, length=criteria_count))
|
256
|
+
else None,
|
276
257
|
**kwargs,
|
277
258
|
)
|
278
259
|
|
@@ -295,11 +276,6 @@ class GiveRating(WithBriefing, LLMUsage):
|
|
295
276
|
if len(criteria) < 2: # noqa: PLR2004
|
296
277
|
raise ValueError("At least two criteria are required to draft rating weights")
|
297
278
|
|
298
|
-
def _validator(resp: str) -> float | None:
|
299
|
-
if (cap := JsonCapture.convert_with(resp, orjson.loads)) is not None and isinstance(cap, float):
|
300
|
-
return cap
|
301
|
-
return None
|
302
|
-
|
303
279
|
criteria = list(criteria) # freeze the order
|
304
280
|
windows = windowed(criteria, 2)
|
305
281
|
|
@@ -316,7 +292,7 @@ class GiveRating(WithBriefing, LLMUsage):
|
|
316
292
|
)
|
317
293
|
for pair in windows
|
318
294
|
],
|
319
|
-
validator=
|
295
|
+
validator=lambda resp: JsonCapture.validate_with(resp, target_type=float),
|
320
296
|
**GenerateKwargs(system_message=f"# your personal briefing: \n{self.briefing}", **kwargs),
|
321
297
|
)
|
322
298
|
weights = [1]
|
fabricatio/fs/__init__.py
CHANGED
@@ -1,5 +1,17 @@
|
|
1
1
|
"""FileSystem manipulation module for Fabricatio."""
|
2
2
|
|
3
|
-
from fabricatio.fs.
|
3
|
+
from fabricatio.fs.curd import copy_file, create_directory, delete_directory, delete_file, dump_text, move_file, tree
|
4
|
+
from fabricatio.fs.readers import magika, safe_json_read, safe_text_read
|
4
5
|
|
5
|
-
__all__ = [
|
6
|
+
__all__ = [
|
7
|
+
"copy_file",
|
8
|
+
"create_directory",
|
9
|
+
"delete_directory",
|
10
|
+
"delete_file",
|
11
|
+
"dump_text",
|
12
|
+
"magika",
|
13
|
+
"move_file",
|
14
|
+
"safe_json_read",
|
15
|
+
"safe_text_read",
|
16
|
+
"tree",
|
17
|
+
]
|
fabricatio/models/action.py
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
import traceback
|
4
4
|
from abc import abstractmethod
|
5
5
|
from asyncio import Queue, create_task
|
6
|
-
from typing import Any, Dict, Self, Tuple, Type, Union, Unpack
|
6
|
+
from typing import Any, Dict, Self, Tuple, Type, Union, Unpack, final
|
7
7
|
|
8
8
|
from fabricatio.capabilities.rating import GiveRating
|
9
9
|
from fabricatio.capabilities.task import HandleTask, ProposeTask
|
@@ -17,11 +17,26 @@ from pydantic import Field, PrivateAttr
|
|
17
17
|
class Action(HandleTask, ProposeTask, GiveRating):
|
18
18
|
"""Class that represents an action to be executed in a workflow."""
|
19
19
|
|
20
|
+
name: str = Field(default="")
|
21
|
+
"""The name of the action."""
|
22
|
+
description: str = Field(default="")
|
23
|
+
"""The description of the action."""
|
20
24
|
personality: str = Field(default="")
|
21
25
|
"""The personality of whom the action belongs to."""
|
22
26
|
output_key: str = Field(default="")
|
23
27
|
"""The key of the output data."""
|
24
28
|
|
29
|
+
@final
|
30
|
+
def model_post_init(self, __context: Any) -> None:
|
31
|
+
"""Initialize the action by setting the name if not provided.
|
32
|
+
|
33
|
+
Args:
|
34
|
+
__context: The context to be used for initialization.
|
35
|
+
"""
|
36
|
+
self.name = self.name or self.__class__.__name__
|
37
|
+
|
38
|
+
self.description = self.description or self.__class__.__doc__ or ""
|
39
|
+
|
25
40
|
@abstractmethod
|
26
41
|
async def _execute(self, **cxt: Unpack) -> Any:
|
27
42
|
"""Execute the action with the provided arguments.
|
@@ -34,6 +49,7 @@ class Action(HandleTask, ProposeTask, GiveRating):
|
|
34
49
|
"""
|
35
50
|
pass
|
36
51
|
|
52
|
+
@final
|
37
53
|
async def act(self, cxt: Dict[str, Any]) -> Dict[str, Any]:
|
38
54
|
"""Perform the action by executing it and setting the output data.
|
39
55
|
|
@@ -92,9 +108,8 @@ class WorkFlow(WithBriefing, ToolBoxUsage):
|
|
92
108
|
Returns:
|
93
109
|
Self: The instance of the workflow with the injected personality.
|
94
110
|
"""
|
95
|
-
for a in self._instances:
|
96
|
-
|
97
|
-
a.personality = personality
|
111
|
+
for a in filter(lambda action: not action.personality, self._instances):
|
112
|
+
a.personality = personality
|
98
113
|
return self
|
99
114
|
|
100
115
|
async def serve(self, task: Task) -> None:
|
@@ -0,0 +1,96 @@
|
|
1
|
+
"""Extra models for built-in actions."""
|
2
|
+
|
3
|
+
from typing import List
|
4
|
+
|
5
|
+
from fabricatio.models.generic import Display, PrepareVectorization, ProposedAble
|
6
|
+
from pydantic import BaseModel, ConfigDict, Field
|
7
|
+
|
8
|
+
|
9
|
+
class Equation(BaseModel):
|
10
|
+
"""Structured representation of mathematical equations (including their physical or conceptual meanings)."""
|
11
|
+
|
12
|
+
model_config = ConfigDict(use_attribute_docstrings=True)
|
13
|
+
|
14
|
+
description: str = Field(...)
|
15
|
+
"""A concise explanation of the equation's meaning, purpose, and relevance in the context of the research."""
|
16
|
+
|
17
|
+
latex_code: str = Field(...)
|
18
|
+
"""The LaTeX code used to represent the equation in a publication-ready format."""
|
19
|
+
|
20
|
+
|
21
|
+
class Figure(BaseModel):
|
22
|
+
"""Structured representation of figures (including their academic significance and explanatory captions)."""
|
23
|
+
|
24
|
+
model_config = ConfigDict(use_attribute_docstrings=True)
|
25
|
+
|
26
|
+
description: str = Field(...)
|
27
|
+
"""A detailed explanation of the figure's content and its role in conveying key insights."""
|
28
|
+
|
29
|
+
figure_caption: str = Field(...)
|
30
|
+
"""The caption accompanying the figure, summarizing its main points and academic value."""
|
31
|
+
|
32
|
+
figure_path: str = Field(...)
|
33
|
+
"""The file path to the figure"""
|
34
|
+
|
35
|
+
|
36
|
+
class Highlightings(BaseModel):
|
37
|
+
"""Structured representation of highlighted elements in an academic paper (including equations, algorithms, figures, and tables)."""
|
38
|
+
|
39
|
+
# Academic Achievements Showcase
|
40
|
+
highlighted_equations: List[Equation] = Field(default_factory=list)
|
41
|
+
"""Core mathematical equations that represent breakthroughs in the field, accompanied by explanations of their physical or conceptual significance."""
|
42
|
+
|
43
|
+
highlighted_algorithms: List[str] = Field(default_factory=list)
|
44
|
+
"""Pseudocode for key algorithms, annotated to highlight innovative components."""
|
45
|
+
|
46
|
+
highlighted_figures: List[Figure] = Field(default_factory=list)
|
47
|
+
"""Critical diagrams or illustrations, each accompanied by a caption explaining their academic importance."""
|
48
|
+
|
49
|
+
highlighted_tables: List[str] = Field(default_factory=list)
|
50
|
+
"""Important data tables, annotated to indicate statistical significance or other notable findings."""
|
51
|
+
|
52
|
+
|
53
|
+
class ArticleEssence(ProposedAble, Display, PrepareVectorization):
|
54
|
+
"""Structured representation of the core elements of an academic paper(providing a comprehensive digital profile of the paper's essential information)."""
|
55
|
+
|
56
|
+
# Basic Metadata
|
57
|
+
title: str = Field(...)
|
58
|
+
"""The full title of the paper, including any subtitles if applicable."""
|
59
|
+
|
60
|
+
authors: List[str] = Field(default_factory=list)
|
61
|
+
"""A list of the paper's authors, typically in the order of contribution."""
|
62
|
+
|
63
|
+
keywords: List[str] = Field(default_factory=list)
|
64
|
+
"""A list of keywords that summarize the paper's focus and facilitate indexing."""
|
65
|
+
|
66
|
+
publication_year: int = Field(None)
|
67
|
+
"""The year in which the paper was published."""
|
68
|
+
|
69
|
+
# Core Content Elements
|
70
|
+
domain: List[str] = Field(default_factory=list)
|
71
|
+
"""The research domains or fields addressed by the paper (e.g., ['Natural Language Processing', 'Computer Vision'])."""
|
72
|
+
|
73
|
+
abstract: str = Field(...)
|
74
|
+
"""A structured abstract that outlines the research problem, methodology, and conclusions in three distinct sections."""
|
75
|
+
|
76
|
+
core_contributions: List[str] = Field(default_factory=list)
|
77
|
+
"""Key academic contributions that distinguish the paper from prior work in the field."""
|
78
|
+
|
79
|
+
technical_novelty: List[str] = Field(default_factory=list)
|
80
|
+
"""Specific technical innovations introduced by the research, listed as individual points."""
|
81
|
+
|
82
|
+
# Academic Discussion Dimensions
|
83
|
+
research_problem: str = Field("")
|
84
|
+
"""A clearly defined research question or problem addressed by the study."""
|
85
|
+
|
86
|
+
limitations: List[str] = Field(default_factory=list)
|
87
|
+
"""An analysis of the methodological or experimental limitations of the research."""
|
88
|
+
|
89
|
+
future_work: List[str] = Field(default_factory=list)
|
90
|
+
"""Suggestions for potential directions or topics for follow-up studies."""
|
91
|
+
|
92
|
+
impact_analysis: str = Field("")
|
93
|
+
"""An assessment of the paper's potential influence on the development of the field."""
|
94
|
+
|
95
|
+
def _prepare_vectorization_inner(self) -> str:
|
96
|
+
return self.model_dump_json()
|
fabricatio/models/generic.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
"""This module defines generic classes for models in the Fabricatio library."""
|
2
2
|
|
3
|
+
from abc import abstractmethod
|
3
4
|
from pathlib import Path
|
4
5
|
from typing import Callable, Iterable, List, Optional, Self, Union, final
|
5
6
|
|
@@ -8,6 +9,7 @@ from fabricatio._rust import blake3_hash
|
|
8
9
|
from fabricatio._rust_instances import template_manager
|
9
10
|
from fabricatio.config import configs
|
10
11
|
from fabricatio.fs.readers import magika, safe_text_read
|
12
|
+
from fabricatio.journal import logger
|
11
13
|
from fabricatio.parser import JsonCapture
|
12
14
|
from pydantic import (
|
13
15
|
BaseModel,
|
@@ -27,6 +29,18 @@ class Base(BaseModel):
|
|
27
29
|
model_config = ConfigDict(use_attribute_docstrings=True)
|
28
30
|
|
29
31
|
|
32
|
+
class Display(Base):
|
33
|
+
"""Class that provides a method to display the model in a formatted JSON string."""
|
34
|
+
|
35
|
+
def display(self) -> str:
|
36
|
+
"""Display the model in a formatted JSON string.
|
37
|
+
|
38
|
+
Returns:
|
39
|
+
str: The formatted JSON string of the model.
|
40
|
+
"""
|
41
|
+
return self.model_dump_json(indent=1)
|
42
|
+
|
43
|
+
|
30
44
|
class Named(Base):
|
31
45
|
"""Class that includes a name attribute."""
|
32
46
|
|
@@ -199,6 +213,28 @@ class WithDependency(Base):
|
|
199
213
|
)
|
200
214
|
|
201
215
|
|
216
|
+
class PrepareVectorization(Base):
|
217
|
+
"""Class that prepares the vectorization of the model."""
|
218
|
+
|
219
|
+
@abstractmethod
|
220
|
+
def _prepare_vectorization_inner(self) -> str:
|
221
|
+
"""Prepare the vectorization of the model."""
|
222
|
+
|
223
|
+
def prepare_vectorization(self, max_length: Optional[int] = None) -> str:
|
224
|
+
"""Prepare the vectorization of the model.
|
225
|
+
|
226
|
+
Returns:
|
227
|
+
str: The prepared vectorization of the model.
|
228
|
+
"""
|
229
|
+
max_length = max_length or configs.embedding.max_sequence_length
|
230
|
+
chunk = self._prepare_vectorization_inner()
|
231
|
+
if len(chunk) > max_length:
|
232
|
+
logger.error(err := f"Chunk exceeds maximum sequence length {max_length}.")
|
233
|
+
raise ValueError(err)
|
234
|
+
|
235
|
+
return chunk
|
236
|
+
|
237
|
+
|
202
238
|
class ScopedConfig(Base):
|
203
239
|
"""Class that manages a scoped configuration."""
|
204
240
|
|
fabricatio/models/usages.py
CHANGED
@@ -5,7 +5,6 @@ from typing import Callable, Dict, Iterable, List, Optional, Self, Set, Type, Un
|
|
5
5
|
|
6
6
|
import asyncstdlib
|
7
7
|
import litellm
|
8
|
-
import orjson
|
9
8
|
from fabricatio._rust_instances import template_manager
|
10
9
|
from fabricatio.config import configs
|
11
10
|
from fabricatio.journal import logger
|
@@ -23,6 +22,7 @@ from litellm.types.utils import (
|
|
23
22
|
StreamingChoices,
|
24
23
|
)
|
25
24
|
from litellm.utils import CustomStreamWrapper
|
25
|
+
from more_itertools import duplicates_everseen
|
26
26
|
from pydantic import Field, NonNegativeInt, PositiveInt
|
27
27
|
|
28
28
|
|
@@ -212,11 +212,10 @@ class LLMUsage(ScopedConfig):
|
|
212
212
|
**kwargs,
|
213
213
|
)
|
214
214
|
) and (validated := validator(response)):
|
215
|
-
logger.debug(f"Successfully validated the response at {i}th attempt.
|
215
|
+
logger.debug(f"Successfully validated the response at {i}th attempt.")
|
216
216
|
return validated
|
217
|
-
|
218
|
-
|
219
|
-
raise ValueError("Failed to validate the response.")
|
217
|
+
logger.error(err := f"Failed to validate the response after {max_validations} attempts.")
|
218
|
+
raise ValueError(err)
|
220
219
|
|
221
220
|
async def aask_validate_batch[T](
|
222
221
|
self,
|
@@ -255,7 +254,7 @@ class LLMUsage(ScopedConfig):
|
|
255
254
|
configs.templates.liststr_template,
|
256
255
|
{"requirement": requirement, "k": k},
|
257
256
|
),
|
258
|
-
lambda resp: JsonCapture.validate_with(resp,
|
257
|
+
lambda resp: JsonCapture.validate_with(resp, target_type=list, elements_type=str, length=k),
|
259
258
|
**kwargs,
|
260
259
|
)
|
261
260
|
|
@@ -282,6 +281,9 @@ class LLMUsage(ScopedConfig):
|
|
282
281
|
- Ensures response compliance through JSON parsing and format validation.
|
283
282
|
- Relies on `aask_validate` to implement retry mechanisms with validation.
|
284
283
|
"""
|
284
|
+
if dup := duplicates_everseen(choices, key=lambda x: x.name):
|
285
|
+
logger.error(err := f"Redundant choices: {dup}")
|
286
|
+
raise ValueError(err)
|
285
287
|
prompt = template_manager.render_template(
|
286
288
|
configs.templates.make_choice_template,
|
287
289
|
{
|
@@ -291,19 +293,16 @@ class LLMUsage(ScopedConfig):
|
|
291
293
|
},
|
292
294
|
)
|
293
295
|
names = {c.name for c in choices}
|
296
|
+
|
294
297
|
logger.debug(f"Start choosing between {names} with prompt: \n{prompt}")
|
295
298
|
|
296
299
|
def _validate(response: str) -> List[T] | None:
|
297
|
-
ret = JsonCapture.
|
298
|
-
|
299
|
-
if not isinstance(ret, List) or (0 < k != len(ret)):
|
300
|
-
logger.error(f"Incorrect Type or length of response: \n{ret}")
|
301
|
-
return None
|
302
|
-
if any(n not in names for n in ret):
|
303
|
-
logger.error(f"Invalid choice in response: \n{ret}")
|
300
|
+
ret = JsonCapture.validate_with(response, target_type=List, elements_type=str, length=k)
|
301
|
+
if ret is None or set(ret) - names:
|
304
302
|
return None
|
305
|
-
|
306
|
-
|
303
|
+
return [
|
304
|
+
next(candidate for candidate in choices if candidate.name == candidate_name) for candidate_name in ret
|
305
|
+
]
|
307
306
|
|
308
307
|
return await self.aask_validate(
|
309
308
|
question=prompt,
|
@@ -357,19 +356,12 @@ class LLMUsage(ScopedConfig):
|
|
357
356
|
Returns:
|
358
357
|
bool: The judgment result (True or False) based on the AI's response.
|
359
358
|
"""
|
360
|
-
|
361
|
-
def _validate(response: str) -> bool | None:
|
362
|
-
ret = JsonCapture.convert_with(response, orjson.loads)
|
363
|
-
if not isinstance(ret, bool):
|
364
|
-
return None
|
365
|
-
return ret
|
366
|
-
|
367
359
|
return await self.aask_validate(
|
368
360
|
question=template_manager.render_template(
|
369
361
|
configs.templates.make_judgment_template,
|
370
362
|
{"prompt": prompt, "affirm_case": affirm_case, "deny_case": deny_case},
|
371
363
|
),
|
372
|
-
validator=
|
364
|
+
validator=lambda resp: JsonCapture.validate_with(resp, target_type=bool),
|
373
365
|
**kwargs,
|
374
366
|
)
|
375
367
|
|
fabricatio/parser.py
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
from typing import Any, Callable, Optional, Self, Tuple, Type
|
4
4
|
|
5
|
+
import orjson
|
5
6
|
import regex
|
6
7
|
from pydantic import BaseModel, ConfigDict, Field, PositiveInt, PrivateAttr, ValidationError
|
7
8
|
from regex import Pattern, compile
|
@@ -27,11 +28,7 @@ class Capture(BaseModel):
|
|
27
28
|
_compiled: Pattern = PrivateAttr()
|
28
29
|
|
29
30
|
def model_post_init(self, __context: Any) -> None:
|
30
|
-
"""Initialize the compiled
|
31
|
-
|
32
|
-
Args:
|
33
|
-
__context (Any): The context in which the model is initialized.
|
34
|
-
"""
|
31
|
+
"""Initialize the compiled pattern."""
|
35
32
|
self._compiled = compile(self.pattern, self.flags)
|
36
33
|
|
37
34
|
def capture(self, text: str) -> Tuple[str, ...] | str | None:
|
@@ -69,7 +66,6 @@ class Capture(BaseModel):
|
|
69
66
|
if (cap := self.capture(text)) is None:
|
70
67
|
return None
|
71
68
|
try:
|
72
|
-
logger.debug(f"Trying to convert: \n{cap}")
|
73
69
|
return convertor(cap)
|
74
70
|
except (ValueError, SyntaxError, ValidationError) as e:
|
75
71
|
logger.error(f"Failed to convert text using {convertor.__name__} to convert.\nerror: {e}\n {cap}")
|
@@ -78,19 +74,19 @@ class Capture(BaseModel):
|
|
78
74
|
def validate_with[K, T, E](
|
79
75
|
self,
|
80
76
|
text: str,
|
81
|
-
deserializer: Callable[[Tuple[str, ...]], K] | Callable[[str], K],
|
82
77
|
target_type: Type[T],
|
83
78
|
elements_type: Optional[Type[E]] = None,
|
84
79
|
length: Optional[int] = None,
|
80
|
+
deserializer: Callable[[Tuple[str, ...]], K] | Callable[[str], K] = orjson.loads,
|
85
81
|
) -> T | None:
|
86
82
|
"""Validate the given text using the pattern.
|
87
83
|
|
88
84
|
Args:
|
89
85
|
text (str): The text to search the pattern in.
|
90
|
-
|
91
|
-
|
92
|
-
elements_type (Optional[Type[E]]): The expected type of the elements in the output.
|
86
|
+
target_type (Type[T]): The expected type of the output, dict or list.
|
87
|
+
elements_type (Optional[Type[E]]): The expected type of the elements in the output dict keys or list elements.
|
93
88
|
length (Optional[int]): The expected length of the output, bool(length)==False means no length validation.
|
89
|
+
deserializer (Callable[[Tuple[str, ...]], K] | Callable[[str], K]): The function to deserialize the captured text.
|
94
90
|
|
95
91
|
Returns:
|
96
92
|
T | None: The validated text if the pattern is found and the output is of the expected type, otherwise None.
|
@@ -0,0 +1,11 @@
|
|
1
|
+
"""Store article essence in the database."""
|
2
|
+
|
3
|
+
from fabricatio.actions.article import ExtractArticleEssence
|
4
|
+
from fabricatio.actions.rag import InjectToDB
|
5
|
+
from fabricatio.models.action import WorkFlow
|
6
|
+
|
7
|
+
StoreArticle = WorkFlow(
|
8
|
+
name="Extract Article Essence",
|
9
|
+
description="Extract the essence of an article in the given path, and store it in the database.",
|
10
|
+
steps=(ExtractArticleEssence(output_key="to_inject"), InjectToDB(output_key="task_output")),
|
11
|
+
)
|
Binary file
|
@@ -1,17 +1,18 @@
|
|
1
|
-
fabricatio-0.2.4.
|
2
|
-
fabricatio-0.2.4.
|
3
|
-
fabricatio-0.2.4.
|
1
|
+
fabricatio-0.2.4.dev2.dist-info/METADATA,sha256=vhANe4uPf0Sg_UmcfdmjBD9GI44lY6XrlLz0OFf4bQk,8589
|
2
|
+
fabricatio-0.2.4.dev2.dist-info/WHEEL,sha256=RIvmwLDYujv60MYBx2jxyP4vdn1DD7X0kBgz1TQvZuc,108
|
3
|
+
fabricatio-0.2.4.dev2.dist-info/licenses/LICENSE,sha256=yDZaTLnOi03bi3Dk6f5IjhLUc5old2yOsihHWU0z-i0,1067
|
4
4
|
fabricatio/decorators.py,sha256=cJHsxxbnMhc4SzPl4454CPLuDP3H0qbTrzV_U2rLPrs,6372
|
5
5
|
fabricatio/core.py,sha256=MaEKZ6DDmbdScAY-7F1gwGA6fr7ADX6Mz5rNVi2msFA,6277
|
6
|
-
fabricatio/models/generic.py,sha256=
|
6
|
+
fabricatio/models/generic.py,sha256=bjAD8g4tz1VjnIfnpTD6sRu1dBuBoswmUrn6pCnL6aY,11504
|
7
7
|
fabricatio/models/tool.py,sha256=wgNXtobDSWZTVmIrSSaS5oyxhSUYWXhS0S2o9acE2ls,6800
|
8
8
|
fabricatio/models/role.py,sha256=fAYH8EYpt8ad2uKRV9PVjDNqHuGLxY6_s36mXM0S5I0,1782
|
9
|
+
fabricatio/models/extra.py,sha256=MC6l23iKv-DRkHDAZryFY9Rsvs244YVqe4VOHvO0-u8,4232
|
9
10
|
fabricatio/models/kwargs_types.py,sha256=ZxqJhLnwMhrV0La_q8IBi3DfRd9bHDS8vI_kukZaeUE,1727
|
10
11
|
fabricatio/models/utils.py,sha256=vahILaesw50ofFft-wZ9kZ_Qogqi6vIOISWczvwVXxk,4311
|
11
|
-
fabricatio/models/usages.py,sha256=
|
12
|
+
fabricatio/models/usages.py,sha256=iXh4MbDhrzshOxH704NY_5nbt7Lx5ckyBCs_nboxlM8,24943
|
12
13
|
fabricatio/models/events.py,sha256=sBCKeNoYc4TFDoke-jhFEyA11RcdGu-wdF5ynAuVOMM,3983
|
13
14
|
fabricatio/models/task.py,sha256=A0xdbrMeqhWqpmoMplTuaZmsKKHl93InZJ2iKjaRNBA,10683
|
14
|
-
fabricatio/models/action.py,sha256=
|
15
|
+
fabricatio/models/action.py,sha256=CYg4YPOo_NhAl8Nm8bCoEJkF4xPZZ-ZsVJmABrx2u_Q,6335
|
15
16
|
fabricatio/toolboxes/fs.py,sha256=oZjGOmtN8ZbkMXxCMatqvdPavVXyQ87AEDc1lx9jimY,483
|
16
17
|
fabricatio/toolboxes/__init__.py,sha256=iEBsH0aRPLccIe6eyZ6iH5GdK8yYNQTnCwLzw4pWXAA,465
|
17
18
|
fabricatio/toolboxes/arithmetic.py,sha256=sSTPkKI6-mb278DwQKFO9jKyzc9kCx45xNH7V6bGBpE,1307
|
@@ -19,19 +20,20 @@ fabricatio/toolboxes/task.py,sha256=9J3W-48CCEUh5PFz0XEC47Hv23Ugn1BEg-J_woQE1UA,
|
|
19
20
|
fabricatio/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
20
21
|
fabricatio/fs/readers.py,sha256=lqMrM5Nt3l6QJmPDoBno2PSaN2BbFwfUjBqaghrbK3A,1002
|
21
22
|
fabricatio/fs/curd.py,sha256=g74Pc2WPSkPlCXX0cWj2jyiDYTfkLwpwM480eyCkmc8,3851
|
22
|
-
fabricatio/fs/__init__.py,sha256=
|
23
|
+
fabricatio/fs/__init__.py,sha256=Jlzk4AvBFgcgQ6nIgGRSUIh2UZHtpl27eyu395lR3c0,448
|
23
24
|
fabricatio/config.py,sha256=ZUUJ5rb6ZBq8Vrkk9wqfMU8ncomRoHgarW7YuThEJK4,13188
|
24
25
|
fabricatio/journal.py,sha256=bzxZay48ZWI0VIkkDXm4Wc_Cc9lBQYa2VGx3Hxy_PtA,753
|
25
|
-
fabricatio/__init__.py,sha256=
|
26
|
-
fabricatio/actions/
|
27
|
-
fabricatio/actions/article.py,sha256=
|
26
|
+
fabricatio/__init__.py,sha256=gakP6hzHIQccKu9ByHhUP-cN36wCPEr11s13iTdO6EY,1503
|
27
|
+
fabricatio/actions/rag.py,sha256=tP1uKLq7mo19YhsJzFbGYryrcMx6xcvBc3n_Wzh0hjg,796
|
28
|
+
fabricatio/actions/article.py,sha256=ozqEa75_MS4OfCJZzcmF5_s3AJmAUrQGdHORWtZBsE0,1977
|
28
29
|
fabricatio/_rust_instances.py,sha256=JAtO-vL8ihvduf1SHLNf0w7ZSVGCJeIv6zZ9Ekyy1hY,271
|
29
|
-
fabricatio/
|
30
|
+
fabricatio/workflows/articles.py,sha256=KlTaMSnxkQqHAIl-zZ8_wMqheka7IL99nt7XzQxDSF0,462
|
31
|
+
fabricatio/parser.py,sha256=S-4p1mjvJZEgnQW6WKdkmE68MzqqWXGSiLbADdhE-W4,4791
|
30
32
|
fabricatio/capabilities/rag.py,sha256=bF-SSZRQbG00QjzoAXrWCfzFxHxfvxV3al_G8T4jxL8,12985
|
31
|
-
fabricatio/capabilities/rating.py,sha256=
|
33
|
+
fabricatio/capabilities/rating.py,sha256=E1obAS48B4j3Cf6rC6h-TyzRb4vQ2Hho2IM7630jSoI,13695
|
32
34
|
fabricatio/capabilities/propose.py,sha256=oW9QKpY2mDkVPEvgsgqxXxDR2ylVqN5TEL8E0Wh_vJI,1714
|
33
35
|
fabricatio/capabilities/task.py,sha256=BISAFbMgBI4udW0otE3-iyq0RXc05YN_30N0yI5qLxQ,4504
|
34
36
|
fabricatio/_rust.pyi,sha256=clhcURuiB9zlFo4m3VyoWQ8Xs4tvg6KNHXpF-ok9h4o,1703
|
35
|
-
fabricatio/_rust.cpython-312-x86_64-linux-gnu.so,sha256=
|
36
|
-
fabricatio-0.2.4.
|
37
|
-
fabricatio-0.2.4.
|
37
|
+
fabricatio/_rust.cpython-312-x86_64-linux-gnu.so,sha256=CDyz4oafWsO46rOc8DyHBfDVj4dY2LkheAQTGfSXmgs,1333952
|
38
|
+
fabricatio-0.2.4.dev2.data/scripts/tdown,sha256=lUUKFHytXf1um4ZhtzvZlMqpBPWN7IjAsKKT9IgDwog,4560064
|
39
|
+
fabricatio-0.2.4.dev2.dist-info/RECORD,,
|
fabricatio/actions/__init__.py
DELETED
Binary file
|
File without changes
|
File without changes
|