fabricatio 0.2.6.dev3__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fabricatio/__init__.py +60 -0
- fabricatio/_rust.cp39-win_amd64.pyd +0 -0
- fabricatio/_rust.pyi +116 -0
- fabricatio/_rust_instances.py +10 -0
- fabricatio/actions/article.py +81 -0
- fabricatio/actions/output.py +19 -0
- fabricatio/actions/rag.py +25 -0
- fabricatio/capabilities/correct.py +115 -0
- fabricatio/capabilities/propose.py +49 -0
- fabricatio/capabilities/rag.py +369 -0
- fabricatio/capabilities/rating.py +339 -0
- fabricatio/capabilities/review.py +278 -0
- fabricatio/capabilities/task.py +113 -0
- fabricatio/config.py +400 -0
- fabricatio/core.py +181 -0
- fabricatio/decorators.py +179 -0
- fabricatio/fs/__init__.py +29 -0
- fabricatio/fs/curd.py +149 -0
- fabricatio/fs/readers.py +46 -0
- fabricatio/journal.py +21 -0
- fabricatio/models/action.py +158 -0
- fabricatio/models/events.py +120 -0
- fabricatio/models/extra.py +171 -0
- fabricatio/models/generic.py +406 -0
- fabricatio/models/kwargs_types.py +158 -0
- fabricatio/models/role.py +48 -0
- fabricatio/models/task.py +299 -0
- fabricatio/models/tool.py +189 -0
- fabricatio/models/usages.py +682 -0
- fabricatio/models/utils.py +167 -0
- fabricatio/parser.py +149 -0
- fabricatio/py.typed +0 -0
- fabricatio/toolboxes/__init__.py +15 -0
- fabricatio/toolboxes/arithmetic.py +62 -0
- fabricatio/toolboxes/fs.py +31 -0
- fabricatio/workflows/articles.py +15 -0
- fabricatio/workflows/rag.py +11 -0
- fabricatio-0.2.6.dev3.data/scripts/tdown.exe +0 -0
- fabricatio-0.2.6.dev3.dist-info/METADATA +432 -0
- fabricatio-0.2.6.dev3.dist-info/RECORD +42 -0
- fabricatio-0.2.6.dev3.dist-info/WHEEL +4 -0
- fabricatio-0.2.6.dev3.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,120 @@
|
|
1
|
+
"""The module containing the Event class."""
|
2
|
+
|
3
|
+
from typing import List, Self, Union
|
4
|
+
|
5
|
+
from fabricatio.config import configs
|
6
|
+
from fabricatio.models.utils import TaskStatus
|
7
|
+
from pydantic import BaseModel, ConfigDict, Field
|
8
|
+
|
9
|
+
type EventLike = Union[str, List[str], "Event"]
|
10
|
+
|
11
|
+
|
12
|
+
class Event(BaseModel):
|
13
|
+
"""A class representing an event."""
|
14
|
+
|
15
|
+
model_config = ConfigDict(use_attribute_docstrings=True)
|
16
|
+
|
17
|
+
segments: List[str] = Field(default_factory=list, frozen=True)
|
18
|
+
""" The segments of the namespaces."""
|
19
|
+
|
20
|
+
@classmethod
|
21
|
+
def instantiate_from(cls, event: EventLike) -> "Event":
|
22
|
+
"""Create an Event instance from a string or list of strings or an Event instance.
|
23
|
+
|
24
|
+
Args:
|
25
|
+
event (EventLike): The event to instantiate from.
|
26
|
+
|
27
|
+
Returns:
|
28
|
+
Event: The Event instance.
|
29
|
+
"""
|
30
|
+
if isinstance(event, Event):
|
31
|
+
return event.clone()
|
32
|
+
if isinstance(event, str):
|
33
|
+
event = event.split(configs.pymitter.delimiter)
|
34
|
+
|
35
|
+
return cls(segments=event)
|
36
|
+
|
37
|
+
@classmethod
|
38
|
+
def quick_instantiate(cls, event: EventLike) -> "Event":
|
39
|
+
"""Create an Event instance from a string or list of strings or an Event instance and push a wildcard and pending segment.
|
40
|
+
|
41
|
+
Args:
|
42
|
+
event (EventLike): The event to instantiate from.
|
43
|
+
|
44
|
+
Returns:
|
45
|
+
Event: The Event instance.
|
46
|
+
|
47
|
+
Notes:
|
48
|
+
This method is used to create an Event instance from a string or list of strings or an Event instance and push a wildcard and pending segment.
|
49
|
+
"""
|
50
|
+
return cls.instantiate_from(event).push_wildcard().push_pending()
|
51
|
+
|
52
|
+
def derive(self, event: EventLike) -> Self:
|
53
|
+
"""Derive a new event from this event and another event or a string."""
|
54
|
+
return self.clone().concat(event)
|
55
|
+
|
56
|
+
def collapse(self) -> str:
|
57
|
+
"""Collapse the event into a string."""
|
58
|
+
return configs.pymitter.delimiter.join(self.segments)
|
59
|
+
|
60
|
+
def clone(self) -> Self:
|
61
|
+
"""Clone the event."""
|
62
|
+
return self.__class__(segments=list(self.segments))
|
63
|
+
|
64
|
+
def push(self, segment: str) -> Self:
|
65
|
+
"""Push a segment to the event."""
|
66
|
+
if not segment:
|
67
|
+
raise ValueError("The segment must not be empty.")
|
68
|
+
if configs.pymitter.delimiter in segment:
|
69
|
+
raise ValueError("The segment must not contain the delimiter.")
|
70
|
+
|
71
|
+
self.segments.append(segment)
|
72
|
+
return self
|
73
|
+
|
74
|
+
def push_wildcard(self) -> Self:
|
75
|
+
"""Push a wildcard segment to the event."""
|
76
|
+
return self.push("*")
|
77
|
+
|
78
|
+
def push_pending(self) -> Self:
|
79
|
+
"""Push a pending segment to the event."""
|
80
|
+
return self.push(TaskStatus.Pending.value)
|
81
|
+
|
82
|
+
def push_running(self) -> Self:
|
83
|
+
"""Push a running segment to the event."""
|
84
|
+
return self.push(TaskStatus.Running.value)
|
85
|
+
|
86
|
+
def push_finished(self) -> Self:
|
87
|
+
"""Push a finished segment to the event."""
|
88
|
+
return self.push(TaskStatus.Finished.value)
|
89
|
+
|
90
|
+
def push_failed(self) -> Self:
|
91
|
+
"""Push a failed segment to the event."""
|
92
|
+
return self.push(TaskStatus.Failed.value)
|
93
|
+
|
94
|
+
def push_cancelled(self) -> Self:
|
95
|
+
"""Push a cancelled segment to the event."""
|
96
|
+
return self.push(TaskStatus.Cancelled.value)
|
97
|
+
|
98
|
+
def pop(self) -> str:
|
99
|
+
"""Pop a segment from the event."""
|
100
|
+
return self.segments.pop()
|
101
|
+
|
102
|
+
def clear(self) -> Self:
|
103
|
+
"""Clear the event."""
|
104
|
+
self.segments.clear()
|
105
|
+
return self
|
106
|
+
|
107
|
+
def concat(self, event: EventLike) -> Self:
|
108
|
+
"""Concatenate another event to this event."""
|
109
|
+
self.segments.extend(Event.instantiate_from(event).segments)
|
110
|
+
return self
|
111
|
+
|
112
|
+
def __hash__(self) -> int:
|
113
|
+
"""Return the hash of the event, using the collapsed string."""
|
114
|
+
return hash(self.collapse())
|
115
|
+
|
116
|
+
def __eq__(self, other: object) -> bool:
|
117
|
+
"""Check if the event is equal to another event or a string."""
|
118
|
+
if not isinstance(other, (str , list , Event)):
|
119
|
+
return False
|
120
|
+
return self.collapse() == Event.instantiate_from(other).collapse()
|
@@ -0,0 +1,171 @@
|
|
1
|
+
"""Extra models for built-in actions."""
|
2
|
+
|
3
|
+
from typing import List
|
4
|
+
|
5
|
+
from fabricatio.models.generic import Base, Display, FinalizedDumpAble, PrepareVectorization, ProposedAble
|
6
|
+
from pydantic import Field
|
7
|
+
|
8
|
+
|
9
|
+
class Equation(Base):
|
10
|
+
"""Structured representation of mathematical equations (including their physical or conceptual meanings)."""
|
11
|
+
|
12
|
+
description: str
|
13
|
+
"""A concise explanation of the equation's meaning, purpose, and relevance in the context of the research."""
|
14
|
+
|
15
|
+
latex_code: str
|
16
|
+
"""The LaTeX code used to represent the equation in a publication-ready format."""
|
17
|
+
|
18
|
+
|
19
|
+
class Figure(Base):
|
20
|
+
"""Structured representation of figures (including their academic significance and explanatory captions)."""
|
21
|
+
|
22
|
+
description: str
|
23
|
+
"""A detailed explanation of the figure's content and its role in conveying key insights."""
|
24
|
+
|
25
|
+
figure_caption: str
|
26
|
+
"""The caption accompanying the figure, summarizing its main points and academic value."""
|
27
|
+
|
28
|
+
figure_path: str
|
29
|
+
"""The file path to the figure"""
|
30
|
+
|
31
|
+
|
32
|
+
class Highlightings(Base):
|
33
|
+
"""Structured representation of highlighted elements in an academic paper (including equations, algorithms, figures, and tables)."""
|
34
|
+
|
35
|
+
# Academic Achievements Showcase
|
36
|
+
highlighted_equations: List[Equation] = Field(default_factory=list)
|
37
|
+
"""Core mathematical equations that represent breakthroughs in the field, accompanied by explanations of their physical or conceptual significance,Should always be in LaTeX format wrapped in $ or $$ signs."""
|
38
|
+
|
39
|
+
highlighted_algorithms: List[str] = Field(default_factory=list)
|
40
|
+
"""Pseudocode for key algorithms, annotated to highlight innovative components."""
|
41
|
+
|
42
|
+
highlighted_figures: List[Figure] = Field(default_factory=list)
|
43
|
+
"""Critical diagrams or illustrations, each accompanied by a caption explaining their academic importance."""
|
44
|
+
|
45
|
+
highlighted_tables: List[str] = Field(default_factory=list)
|
46
|
+
"""Important data tables, annotated to indicate statistical significance or other notable findings."""
|
47
|
+
|
48
|
+
|
49
|
+
class ArticleEssence(ProposedAble, Display, PrepareVectorization):
|
50
|
+
"""Structured representation of the core elements of an academic paper(providing a comprehensive digital profile of the paper's essential information)."""
|
51
|
+
|
52
|
+
# Basic Metadata
|
53
|
+
title: str = Field(...)
|
54
|
+
"""The full title of the paper, including any subtitles if applicable."""
|
55
|
+
|
56
|
+
authors: List[str]
|
57
|
+
"""A list of the paper's authors, typically in the order of contribution."""
|
58
|
+
|
59
|
+
keywords: List[str]
|
60
|
+
"""A list of keywords that summarize the paper's focus and facilitate indexing."""
|
61
|
+
|
62
|
+
publication_year: int
|
63
|
+
"""The year in which the paper was published."""
|
64
|
+
|
65
|
+
# Core Content Elements
|
66
|
+
highlightings: Highlightings = Field(default_factory=Highlightings)
|
67
|
+
"""A collection of highlighted elements in the paper, including equations, algorithms, figures, and tables."""
|
68
|
+
|
69
|
+
domain: List[str]
|
70
|
+
"""The research domains or fields addressed by the paper (e.g., ['Natural Language Processing', 'Computer Vision'])."""
|
71
|
+
|
72
|
+
abstract: str = Field(...)
|
73
|
+
"""A structured abstract that outlines the research problem, methodology, and conclusions in three distinct sections."""
|
74
|
+
|
75
|
+
core_contributions: List[str]
|
76
|
+
"""Key academic contributions that distinguish the paper from prior work in the field."""
|
77
|
+
|
78
|
+
technical_novelty: List[str]
|
79
|
+
"""Specific technical innovations introduced by the research, listed as individual points."""
|
80
|
+
|
81
|
+
# Academic Discussion Dimensions
|
82
|
+
research_problems: List[str]
|
83
|
+
"""A clearly defined research question or problem addressed by the study."""
|
84
|
+
|
85
|
+
limitations: List[str]
|
86
|
+
"""An analysis of the methodological or experimental limitations of the research."""
|
87
|
+
|
88
|
+
future_work: List[str]
|
89
|
+
"""Suggestions for potential directions or topics for follow-up studies."""
|
90
|
+
|
91
|
+
impact_analysis: List[str]
|
92
|
+
"""An assessment of the paper's potential influence on the development of the field."""
|
93
|
+
|
94
|
+
def _prepare_vectorization_inner(self) -> str:
|
95
|
+
return self.model_dump_json()
|
96
|
+
|
97
|
+
|
98
|
+
class ArticleProposal(ProposedAble, Display):
|
99
|
+
"""Structured representation of the proposal for an academic paper."""
|
100
|
+
|
101
|
+
title: str = Field(...)
|
102
|
+
"""The proposed title of the paper."""
|
103
|
+
|
104
|
+
focused_problem: List[str] = Field(default_factory=list)
|
105
|
+
"""The specific research problem or question that the paper aims to address."""
|
106
|
+
research_aim: List[str] = Field(default_factory=list)
|
107
|
+
"""The main objective or goal of the research, outlining what the study aims to achieve."""
|
108
|
+
research_methods: List[str] = Field(default_factory=list)
|
109
|
+
"""The methods used in the research, including the approach, techniques, and tools employed."""
|
110
|
+
|
111
|
+
|
112
|
+
class ArticleSubsectionOutline(Base):
|
113
|
+
"""Structured representation of the subsections of an academic paper."""
|
114
|
+
|
115
|
+
title: str = Field(...)
|
116
|
+
"""The title of the subsection."""
|
117
|
+
|
118
|
+
description: str = Field(...)
|
119
|
+
"""A brief description of the subsection's content should be, how it fits into the overall structure of the paper, and its significance in the context of the research."""
|
120
|
+
|
121
|
+
|
122
|
+
class ArticleSectionOutline(Base):
|
123
|
+
"""Structured representation of the sections of an academic paper."""
|
124
|
+
|
125
|
+
title: str = Field(...)
|
126
|
+
"""The title of the section."""
|
127
|
+
description: str = Field(...)
|
128
|
+
"""A brief description of the section's content should be, how it fits into the overall structure of the paper, and its significance in the context of the research."""
|
129
|
+
subsections: List[ArticleSubsectionOutline] = Field(default_factory=list)
|
130
|
+
"""The subsections of the section, outlining their content and significance."""
|
131
|
+
|
132
|
+
|
133
|
+
class ArticleChapterOutline(Base):
|
134
|
+
"""Structured representation of the chapters of an academic paper."""
|
135
|
+
|
136
|
+
title: str = Field(...)
|
137
|
+
"""The title of the chapter."""
|
138
|
+
description: str = Field(...)
|
139
|
+
"""A brief description of the chapter's content should be, how it fits into the overall structure of the paper, and its significance in the context of the research."""
|
140
|
+
sections: List[ArticleSectionOutline] = Field(default_factory=list)
|
141
|
+
"""The sections of the chapter, outlining their content and significance."""
|
142
|
+
|
143
|
+
|
144
|
+
class ArticleOutline(ProposedAble, Display, FinalizedDumpAble):
|
145
|
+
"""Structured representation of the outline for an academic paper."""
|
146
|
+
|
147
|
+
title: str = Field(...)
|
148
|
+
"""The proposed title of the paper."""
|
149
|
+
|
150
|
+
prospect: str = Field(...)
|
151
|
+
"""A brief description of the research problem or question that the paper aims to address manipulating methods or techniques"""
|
152
|
+
|
153
|
+
chapters: List[ArticleChapterOutline] = Field(default_factory=list)
|
154
|
+
"""The chapters of the paper, outlining their content and significance."""
|
155
|
+
|
156
|
+
def finalized_dump(self) -> str:
|
157
|
+
"""Finalized dump of the article outline.
|
158
|
+
|
159
|
+
Returns:
|
160
|
+
str: The finalized dump of the article outline.
|
161
|
+
"""
|
162
|
+
lines: List[str] = []
|
163
|
+
|
164
|
+
for chapter in self.chapters:
|
165
|
+
lines.append(f"= {chapter.title}")
|
166
|
+
for section in chapter.sections:
|
167
|
+
lines.append(f"== {section.title}")
|
168
|
+
for subsection in section.subsections:
|
169
|
+
lines.append(f"=== {subsection.title}")
|
170
|
+
|
171
|
+
return "\n\n".join(lines)
|
@@ -0,0 +1,406 @@
|
|
1
|
+
"""This module defines generic classes for models in the Fabricatio library."""
|
2
|
+
|
3
|
+
from abc import abstractmethod
|
4
|
+
from pathlib import Path
|
5
|
+
from typing import Any, Callable, Dict, Iterable, List, Optional, Self, Union, final, overload
|
6
|
+
|
7
|
+
import orjson
|
8
|
+
from fabricatio._rust import blake3_hash
|
9
|
+
from fabricatio._rust_instances import TEMPLATE_MANAGER
|
10
|
+
from fabricatio.config import configs
|
11
|
+
from fabricatio.fs.readers import MAGIKA, safe_text_read
|
12
|
+
from fabricatio.journal import logger
|
13
|
+
from fabricatio.parser import JsonCapture
|
14
|
+
from pydantic import (
|
15
|
+
BaseModel,
|
16
|
+
ConfigDict,
|
17
|
+
Field,
|
18
|
+
HttpUrl,
|
19
|
+
NonNegativeFloat,
|
20
|
+
PositiveFloat,
|
21
|
+
PositiveInt,
|
22
|
+
SecretStr,
|
23
|
+
)
|
24
|
+
|
25
|
+
|
26
|
+
class Base(BaseModel):
|
27
|
+
"""Base class for all models with Pydantic configuration."""
|
28
|
+
|
29
|
+
model_config = ConfigDict(use_attribute_docstrings=True)
|
30
|
+
|
31
|
+
|
32
|
+
class Display(Base):
|
33
|
+
"""Class that provides a method to display the model in a formatted JSON string."""
|
34
|
+
|
35
|
+
def display(self) -> str:
|
36
|
+
"""Display the model in a formatted JSON string.
|
37
|
+
|
38
|
+
Returns:
|
39
|
+
str: The formatted JSON string of the model.
|
40
|
+
"""
|
41
|
+
return self.model_dump_json(indent=1)
|
42
|
+
|
43
|
+
def compact(self) -> str:
|
44
|
+
"""Display the model in a compact JSON string.
|
45
|
+
|
46
|
+
Returns:
|
47
|
+
str: The compact JSON string of the model.
|
48
|
+
"""
|
49
|
+
return self.model_dump_json()
|
50
|
+
|
51
|
+
|
52
|
+
class Named(Base):
|
53
|
+
"""Class that includes a name attribute."""
|
54
|
+
|
55
|
+
name: str = Field(frozen=True)
|
56
|
+
"""The name of the object."""
|
57
|
+
|
58
|
+
|
59
|
+
class Described(Base):
|
60
|
+
"""Class that includes a description attribute."""
|
61
|
+
|
62
|
+
description: str = Field(default="", frozen=True)
|
63
|
+
"""The description of the object."""
|
64
|
+
|
65
|
+
|
66
|
+
class WithBriefing(Named, Described):
|
67
|
+
"""Class that provides a briefing based on the name and description."""
|
68
|
+
|
69
|
+
@property
|
70
|
+
def briefing(self) -> str:
|
71
|
+
"""Get the briefing of the object.
|
72
|
+
|
73
|
+
Returns:
|
74
|
+
str: The briefing of the object.
|
75
|
+
"""
|
76
|
+
return f"{self.name}: {self.description}" if self.description else self.name
|
77
|
+
|
78
|
+
def prepend[D: Dict[str, Any]](self, kwargs: D) -> D:
|
79
|
+
"""Prepend the briefing to the system message in the kwargs.
|
80
|
+
|
81
|
+
Args:
|
82
|
+
kwargs (Dict[str, Any]): The keyword arguments to modify.
|
83
|
+
|
84
|
+
Returns:
|
85
|
+
Dict[str, Any]: The modified keyword arguments.
|
86
|
+
"""
|
87
|
+
kwargs["system_message"] = f"# your personal briefing: \n{self.briefing}\n" + kwargs.get("system_message", "")
|
88
|
+
return kwargs
|
89
|
+
|
90
|
+
|
91
|
+
class WithFormatedJsonSchema(Base):
|
92
|
+
"""Class that provides a formatted JSON schema of the model."""
|
93
|
+
|
94
|
+
@classmethod
|
95
|
+
def formated_json_schema(cls) -> str:
|
96
|
+
"""Get the JSON schema of the model in a formatted string.
|
97
|
+
|
98
|
+
Returns:
|
99
|
+
str: The JSON schema of the model in a formatted string.
|
100
|
+
"""
|
101
|
+
return orjson.dumps(
|
102
|
+
cls.model_json_schema(),
|
103
|
+
option=orjson.OPT_INDENT_2 | orjson.OPT_SORT_KEYS,
|
104
|
+
).decode()
|
105
|
+
|
106
|
+
|
107
|
+
class CreateJsonObjPrompt(WithFormatedJsonSchema):
|
108
|
+
"""Class that provides a prompt for creating a JSON object."""
|
109
|
+
|
110
|
+
@classmethod
|
111
|
+
@overload
|
112
|
+
def create_json_prompt(cls, requirement: List[str]) -> List[str]: ...
|
113
|
+
|
114
|
+
@classmethod
|
115
|
+
@overload
|
116
|
+
def create_json_prompt(cls, requirement: str) -> str: ...
|
117
|
+
|
118
|
+
@classmethod
|
119
|
+
def create_json_prompt(cls, requirement: str | List[str]) -> str | List[str]:
|
120
|
+
"""Create the prompt for creating a JSON object with given requirement.
|
121
|
+
|
122
|
+
Args:
|
123
|
+
requirement (str): The requirement for the JSON object.
|
124
|
+
|
125
|
+
Returns:
|
126
|
+
str: The prompt for creating a JSON object with given requirement.
|
127
|
+
"""
|
128
|
+
if isinstance(requirement, str):
|
129
|
+
return TEMPLATE_MANAGER.render_template(
|
130
|
+
configs.templates.create_json_obj_template,
|
131
|
+
{"requirement": requirement, "json_schema": cls.formated_json_schema()},
|
132
|
+
)
|
133
|
+
return [
|
134
|
+
TEMPLATE_MANAGER.render_template(
|
135
|
+
configs.templates.create_json_obj_template,
|
136
|
+
{"requirement": r, "json_schema": cls.formated_json_schema()},
|
137
|
+
)
|
138
|
+
for r in requirement
|
139
|
+
]
|
140
|
+
|
141
|
+
|
142
|
+
class InstantiateFromString(Base):
|
143
|
+
"""Class that provides a method to instantiate the class from a string."""
|
144
|
+
|
145
|
+
@classmethod
|
146
|
+
def instantiate_from_string(cls, string: str) -> Self | None:
|
147
|
+
"""Instantiate the class from a string.
|
148
|
+
|
149
|
+
Args:
|
150
|
+
string (str): The string to instantiate the class from.
|
151
|
+
|
152
|
+
Returns:
|
153
|
+
Self | None: The instance of the class or None if the string is not valid.
|
154
|
+
"""
|
155
|
+
return JsonCapture.convert_with(string, cls.model_validate_json)
|
156
|
+
|
157
|
+
|
158
|
+
class ProposedAble(CreateJsonObjPrompt, InstantiateFromString):
|
159
|
+
"""Class that provides a method to propose a JSON object based on the requirement."""
|
160
|
+
|
161
|
+
pass
|
162
|
+
|
163
|
+
|
164
|
+
class FinalizedDumpAble(Base):
|
165
|
+
"""Class that provides a method to finalize the dump of the object."""
|
166
|
+
|
167
|
+
@abstractmethod
|
168
|
+
def finalized_dump(self) -> str:
|
169
|
+
"""Finalize the dump of the object.
|
170
|
+
|
171
|
+
Returns:
|
172
|
+
str: The finalized dump of the object.
|
173
|
+
"""
|
174
|
+
|
175
|
+
def finalized_dump_to(self, path: str | Path) -> Self:
|
176
|
+
"""Finalize the dump of the object to a file.
|
177
|
+
|
178
|
+
Args:
|
179
|
+
path (str | Path): The path to save the finalized dump.
|
180
|
+
|
181
|
+
Returns:
|
182
|
+
Self: The current instance of the object.
|
183
|
+
"""
|
184
|
+
Path(path).write_text(self.finalized_dump(), encoding="utf-8")
|
185
|
+
return self
|
186
|
+
|
187
|
+
|
188
|
+
class WithDependency(Base):
|
189
|
+
"""Class that manages file dependencies."""
|
190
|
+
|
191
|
+
dependencies: List[str] = Field(default_factory=list)
|
192
|
+
"""The file dependencies which is needed to read or write to meet a specific requirement, a list of file paths."""
|
193
|
+
|
194
|
+
def add_dependency[P: str | Path](self, dependency: P | List[P]) -> Self:
|
195
|
+
"""Add a file dependency to the task.
|
196
|
+
|
197
|
+
Args:
|
198
|
+
dependency (str | Path | List[str | Path]): The file dependency to add to the task.
|
199
|
+
|
200
|
+
Returns:
|
201
|
+
Self: The current instance of the task.
|
202
|
+
"""
|
203
|
+
if not isinstance(dependency, list):
|
204
|
+
dependency = [dependency]
|
205
|
+
self.dependencies.extend(Path(d).as_posix() for d in dependency)
|
206
|
+
return self
|
207
|
+
|
208
|
+
def remove_dependency[P: str | Path](self, dependency: P | List[P]) -> Self:
|
209
|
+
"""Remove a file dependency from the task.
|
210
|
+
|
211
|
+
Args:
|
212
|
+
dependency (str | Path | List[str | Path]): The file dependency to remove from the task.
|
213
|
+
|
214
|
+
Returns:
|
215
|
+
Self: The current instance of the task.
|
216
|
+
"""
|
217
|
+
if not isinstance(dependency, list):
|
218
|
+
dependency = [dependency]
|
219
|
+
for d in dependency:
|
220
|
+
self.dependencies.remove(Path(d).as_posix())
|
221
|
+
return self
|
222
|
+
|
223
|
+
def clear_dependencies(self) -> Self:
|
224
|
+
"""Clear all file dependencies from the task.
|
225
|
+
|
226
|
+
Returns:
|
227
|
+
Self: The current instance of the task.
|
228
|
+
"""
|
229
|
+
self.dependencies.clear()
|
230
|
+
return self
|
231
|
+
|
232
|
+
def override_dependencies[P: str | Path](self, dependencies: List[P] | P) -> Self:
|
233
|
+
"""Override the file dependencies of the task.
|
234
|
+
|
235
|
+
Args:
|
236
|
+
dependencies (List[str | Path] | str | Path): The file dependencies to override the task's dependencies.
|
237
|
+
|
238
|
+
Returns:
|
239
|
+
Self: The current instance of the task.
|
240
|
+
"""
|
241
|
+
return self.clear_dependencies().add_dependency(dependencies)
|
242
|
+
|
243
|
+
def pop_dependence[T](self, idx: int = -1, reader: Callable[[str], T] = safe_text_read) -> T:
|
244
|
+
"""Pop the file dependencies from the task.
|
245
|
+
|
246
|
+
Returns:
|
247
|
+
str: The popped file dependency
|
248
|
+
"""
|
249
|
+
return reader(self.dependencies.pop(idx))
|
250
|
+
|
251
|
+
@property
|
252
|
+
def dependencies_prompt(self) -> str:
|
253
|
+
"""Generate a prompt for the task based on the file dependencies.
|
254
|
+
|
255
|
+
Returns:
|
256
|
+
str: The generated prompt for the task.
|
257
|
+
"""
|
258
|
+
return TEMPLATE_MANAGER.render_template(
|
259
|
+
configs.templates.dependencies_template,
|
260
|
+
{
|
261
|
+
(pth := Path(p)).name: {
|
262
|
+
"path": pth.as_posix(),
|
263
|
+
"exists": pth.exists(),
|
264
|
+
"description": (identity := MAGIKA.identify_path(pth)).output.description,
|
265
|
+
"size": f"{pth.stat().st_size / (1024 * 1024) if pth.exists() and pth.is_file() else 0:.3f} MB",
|
266
|
+
"content": (text := safe_text_read(pth)),
|
267
|
+
"lines": len(text.splitlines()),
|
268
|
+
"language": identity.output.ct_label,
|
269
|
+
"checksum": blake3_hash(pth.read_bytes()) if pth.exists() and pth.is_file() else "unknown",
|
270
|
+
}
|
271
|
+
for p in self.dependencies
|
272
|
+
},
|
273
|
+
)
|
274
|
+
|
275
|
+
|
276
|
+
class PrepareVectorization(Base):
|
277
|
+
"""Class that prepares the vectorization of the model."""
|
278
|
+
|
279
|
+
@abstractmethod
|
280
|
+
def _prepare_vectorization_inner(self) -> str:
|
281
|
+
"""Prepare the vectorization of the model."""
|
282
|
+
|
283
|
+
def prepare_vectorization(self, max_length: Optional[int] = None) -> str:
|
284
|
+
"""Prepare the vectorization of the model.
|
285
|
+
|
286
|
+
Returns:
|
287
|
+
str: The prepared vectorization of the model.
|
288
|
+
"""
|
289
|
+
max_length = max_length or configs.embedding.max_sequence_length
|
290
|
+
chunk = self._prepare_vectorization_inner()
|
291
|
+
if len(chunk) > max_length:
|
292
|
+
logger.error(err := f"Chunk exceeds maximum sequence length {max_length}.")
|
293
|
+
raise ValueError(err)
|
294
|
+
|
295
|
+
return chunk
|
296
|
+
|
297
|
+
|
298
|
+
class ScopedConfig(Base):
|
299
|
+
"""Class that manages a scoped configuration."""
|
300
|
+
|
301
|
+
llm_api_endpoint: Optional[HttpUrl] = None
|
302
|
+
"""The OpenAI API endpoint."""
|
303
|
+
|
304
|
+
llm_api_key: Optional[SecretStr] = None
|
305
|
+
"""The OpenAI API key."""
|
306
|
+
|
307
|
+
llm_timeout: Optional[PositiveInt] = None
|
308
|
+
"""The timeout of the LLM model."""
|
309
|
+
|
310
|
+
llm_max_retries: Optional[PositiveInt] = None
|
311
|
+
"""The maximum number of retries."""
|
312
|
+
|
313
|
+
llm_model: Optional[str] = None
|
314
|
+
"""The LLM model name."""
|
315
|
+
|
316
|
+
llm_temperature: Optional[NonNegativeFloat] = None
|
317
|
+
"""The temperature of the LLM model."""
|
318
|
+
|
319
|
+
llm_stop_sign: Optional[str | List[str]] = None
|
320
|
+
"""The stop sign of the LLM model."""
|
321
|
+
|
322
|
+
llm_top_p: Optional[NonNegativeFloat] = None
|
323
|
+
"""The top p of the LLM model."""
|
324
|
+
|
325
|
+
llm_generation_count: Optional[PositiveInt] = None
|
326
|
+
"""The number of generations to generate."""
|
327
|
+
|
328
|
+
llm_stream: Optional[bool] = None
|
329
|
+
"""Whether to stream the LLM model's response."""
|
330
|
+
|
331
|
+
llm_max_tokens: Optional[PositiveInt] = None
|
332
|
+
"""The maximum number of tokens to generate."""
|
333
|
+
|
334
|
+
llm_tpm: Optional[PositiveInt] = None
|
335
|
+
"""The tokens per minute of the LLM model."""
|
336
|
+
|
337
|
+
llm_rpm: Optional[PositiveInt] = None
|
338
|
+
"""The requests per minute of the LLM model."""
|
339
|
+
|
340
|
+
embedding_api_endpoint: Optional[HttpUrl] = None
|
341
|
+
"""The OpenAI API endpoint."""
|
342
|
+
|
343
|
+
embedding_api_key: Optional[SecretStr] = None
|
344
|
+
"""The OpenAI API key."""
|
345
|
+
|
346
|
+
embedding_timeout: Optional[PositiveInt] = None
|
347
|
+
"""The timeout of the LLM model."""
|
348
|
+
|
349
|
+
embedding_model: Optional[str] = None
|
350
|
+
"""The LLM model name."""
|
351
|
+
|
352
|
+
embedding_max_sequence_length: Optional[PositiveInt] = None
|
353
|
+
"""The maximum sequence length."""
|
354
|
+
|
355
|
+
embedding_dimensions: Optional[PositiveInt] = None
|
356
|
+
"""The dimensions of the embedding."""
|
357
|
+
embedding_caching: Optional[bool] = False
|
358
|
+
"""Whether to cache the embedding result."""
|
359
|
+
|
360
|
+
milvus_uri: Optional[HttpUrl] = Field(default=None)
|
361
|
+
"""The URI of the Milvus server."""
|
362
|
+
milvus_token: Optional[SecretStr] = Field(default=None)
|
363
|
+
"""The token for the Milvus server."""
|
364
|
+
milvus_timeout: Optional[PositiveFloat] = Field(default=None)
|
365
|
+
"""The timeout for the Milvus server."""
|
366
|
+
milvus_dimensions: Optional[PositiveInt] = Field(default=None)
|
367
|
+
"""The dimensions of the Milvus server."""
|
368
|
+
|
369
|
+
@final
|
370
|
+
def fallback_to(self, other: "ScopedConfig") -> Self:
|
371
|
+
"""Fallback to another instance's attribute values if the current instance's attributes are None.
|
372
|
+
|
373
|
+
Args:
|
374
|
+
other (LLMUsage): Another instance from which to copy attribute values.
|
375
|
+
|
376
|
+
Returns:
|
377
|
+
Self: The current instance, allowing for method chaining.
|
378
|
+
"""
|
379
|
+
# Iterate over the attribute names and copy values from 'other' to 'self' where applicable
|
380
|
+
# noinspection PydanticTypeChecker,PyTypeChecker
|
381
|
+
for attr_name in ScopedConfig.model_fields:
|
382
|
+
# Copy the attribute value from 'other' to 'self' only if 'self' has None and 'other' has a non-None value
|
383
|
+
if getattr(self, attr_name) is None and (attr := getattr(other, attr_name)) is not None:
|
384
|
+
setattr(self, attr_name, attr)
|
385
|
+
|
386
|
+
# Return the current instance to allow for method chaining
|
387
|
+
return self
|
388
|
+
|
389
|
+
@final
|
390
|
+
def hold_to(self, others: Union["ScopedConfig", Iterable["ScopedConfig"]]) -> Self:
|
391
|
+
"""Hold to another instance's attribute values if the current instance's attributes are None.
|
392
|
+
|
393
|
+
Args:
|
394
|
+
others (LLMUsage | Iterable[LLMUsage]): Another instance or iterable of instances from which to copy attribute values.
|
395
|
+
|
396
|
+
Returns:
|
397
|
+
Self: The current instance, allowing for method chaining.
|
398
|
+
"""
|
399
|
+
if not isinstance(others, Iterable):
|
400
|
+
others = [others]
|
401
|
+
for other in others:
|
402
|
+
# noinspection PyTypeChecker,PydanticTypeChecker
|
403
|
+
for attr_name in ScopedConfig.model_fields:
|
404
|
+
if (attr := getattr(self, attr_name)) is not None and getattr(other, attr_name) is None:
|
405
|
+
setattr(other, attr_name, attr)
|
406
|
+
return self
|