content-core 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of content-core might be problematic. Click here for more details.
- content_core/__init__.py +6 -2
- content_core/cc_config.yaml +35 -0
- content_core/common/state.py +4 -0
- content_core/config.py +23 -4
- content_core/content/extraction/graph.py +15 -1
- content_core/notebooks/docling.ipynb +27 -0
- content_core/notebooks/run.ipynb +74 -58
- content_core/processors/docling.py +72 -0
- content_core/templated_message.py +16 -24
- {content_core-0.4.0.dist-info → content_core-0.5.1.dist-info}/METADATA +56 -2
- {content_core-0.4.0.dist-info → content_core-0.5.1.dist-info}/RECORD +14 -12
- content_core/prompter.py +0 -159
- {content_core-0.4.0.dist-info → content_core-0.5.1.dist-info}/WHEEL +0 -0
- {content_core-0.4.0.dist-info → content_core-0.5.1.dist-info}/entry_points.txt +0 -0
- {content_core-0.4.0.dist-info → content_core-0.5.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Docling-based document extraction processor.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
from docling.document_converter import DocumentConverter
|
|
7
|
+
except ImportError:
|
|
8
|
+
|
|
9
|
+
class DocumentConverter:
|
|
10
|
+
"""Stub when docling is not installed."""
|
|
11
|
+
|
|
12
|
+
def __init__(self):
|
|
13
|
+
raise ImportError("Docling not installed")
|
|
14
|
+
|
|
15
|
+
def convert(self, source: str):
|
|
16
|
+
raise ImportError("Docling not installed")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
from content_core.common.state import ProcessSourceState
|
|
20
|
+
from content_core.config import CONFIG
|
|
21
|
+
|
|
22
|
+
# Supported MIME types for Docling extraction
|
|
23
|
+
DOCLING_SUPPORTED = {
|
|
24
|
+
"application/pdf",
|
|
25
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
26
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
27
|
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
|
28
|
+
"text/markdown",
|
|
29
|
+
"text/plain",
|
|
30
|
+
"text/x-markdown",
|
|
31
|
+
"text/csv",
|
|
32
|
+
"text/html",
|
|
33
|
+
"image/png",
|
|
34
|
+
"image/jpeg",
|
|
35
|
+
"image/tiff",
|
|
36
|
+
"image/bmp",
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
async def extract_with_docling(state: ProcessSourceState) -> ProcessSourceState:
|
|
41
|
+
"""
|
|
42
|
+
Use Docling to parse files, URLs, or content into the desired format.
|
|
43
|
+
"""
|
|
44
|
+
# Initialize Docling converter
|
|
45
|
+
converter = DocumentConverter()
|
|
46
|
+
|
|
47
|
+
# Determine source: file path, URL, or direct content
|
|
48
|
+
source = state.file_path or state.url or state.content
|
|
49
|
+
if not source:
|
|
50
|
+
raise ValueError("No input provided for Docling extraction.")
|
|
51
|
+
|
|
52
|
+
# Convert document
|
|
53
|
+
result = converter.convert(source)
|
|
54
|
+
doc = result.document
|
|
55
|
+
|
|
56
|
+
# Determine output format (per execution override, metadata, then config)
|
|
57
|
+
cfg_fmt = (
|
|
58
|
+
CONFIG.get("extraction", {}).get("docling", {}).get("output_format", "markdown")
|
|
59
|
+
)
|
|
60
|
+
fmt = state.output_format or state.metadata.get("docling_format") or cfg_fmt
|
|
61
|
+
# Record the format used
|
|
62
|
+
state.metadata["docling_format"] = fmt
|
|
63
|
+
if fmt == "html":
|
|
64
|
+
output = doc.export_to_html()
|
|
65
|
+
elif fmt == "json":
|
|
66
|
+
output = doc.export_to_json()
|
|
67
|
+
else:
|
|
68
|
+
output = doc.export_to_markdown()
|
|
69
|
+
|
|
70
|
+
# Update state
|
|
71
|
+
state.content = output
|
|
72
|
+
return state
|
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
from typing import Dict, Optional, Union
|
|
2
2
|
|
|
3
|
+
from ai_prompter import Prompter
|
|
3
4
|
from esperanto import LanguageModel
|
|
4
5
|
from esperanto.common_types import Message
|
|
5
6
|
from pydantic import BaseModel, Field
|
|
6
7
|
|
|
7
8
|
from content_core.models import ModelFactory
|
|
8
|
-
from content_core.prompter import Prompter
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class TemplatedMessageInput(BaseModel):
|
|
12
|
-
system_prompt_template: Optional[str] =
|
|
13
|
-
system_prompt_text: Optional[str] =
|
|
14
|
-
user_prompt_template: Optional[str] =
|
|
15
|
-
user_prompt_text: Optional[str] =
|
|
12
|
+
system_prompt_template: Optional[str] = None
|
|
13
|
+
system_prompt_text: Optional[str] = None
|
|
14
|
+
user_prompt_template: Optional[str] = None
|
|
15
|
+
user_prompt_text: Optional[str] = None
|
|
16
16
|
data: Optional[Union[Dict, BaseModel]] = Field(default_factory=lambda: {})
|
|
17
17
|
config: Dict = Field(
|
|
18
18
|
description="The config for the LLM",
|
|
@@ -28,30 +28,22 @@ async def templated_message(
|
|
|
28
28
|
input: TemplatedMessageInput, model: Optional[LanguageModel] = None
|
|
29
29
|
) -> str:
|
|
30
30
|
if not model:
|
|
31
|
-
model = ModelFactory.get_model(
|
|
31
|
+
model = ModelFactory.get_model("default_model")
|
|
32
32
|
|
|
33
33
|
msgs = []
|
|
34
34
|
if input.system_prompt_template or input.system_prompt_text:
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
prompt_text=input.system_prompt_text,
|
|
41
|
-
).render(data=input.data),
|
|
42
|
-
)
|
|
43
|
-
)
|
|
35
|
+
system_prompt = Prompter(
|
|
36
|
+
prompt_template=input.system_prompt_template,
|
|
37
|
+
template_text=input.system_prompt_text,
|
|
38
|
+
).render(data=input.data)
|
|
39
|
+
msgs.append(Message(role="system", content=system_prompt))
|
|
44
40
|
|
|
45
41
|
if input.user_prompt_template or input.user_prompt_text:
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
prompt_text=input.user_prompt_text,
|
|
52
|
-
).render(data=input.data),
|
|
53
|
-
)
|
|
54
|
-
)
|
|
42
|
+
user_prompt = Prompter(
|
|
43
|
+
prompt_template=input.user_prompt_template,
|
|
44
|
+
template_text=input.user_prompt_text,
|
|
45
|
+
).render(data=input.data)
|
|
46
|
+
msgs.append(Message(role="user", content=user_prompt))
|
|
55
47
|
|
|
56
48
|
result = await model.achat_complete(msgs)
|
|
57
49
|
return result.content
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: content-core
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.1
|
|
4
4
|
Summary: Extract what matters from any media source
|
|
5
5
|
Author-email: LUIS NOVO <lfnovo@gmail.com>
|
|
6
6
|
License-File: LICENSE
|
|
7
7
|
Requires-Python: >=3.10
|
|
8
|
+
Requires-Dist: ai-prompter>=0.2.3
|
|
8
9
|
Requires-Dist: aiohttp>=3.11
|
|
9
10
|
Requires-Dist: bs4>=0.0.2
|
|
10
11
|
Requires-Dist: dicttoxml>=1.7.16
|
|
@@ -25,6 +26,11 @@ Requires-Dist: python-magic>=0.4.27
|
|
|
25
26
|
Requires-Dist: python-pptx>=1.0.2
|
|
26
27
|
Requires-Dist: validators>=0.34.0
|
|
27
28
|
Requires-Dist: youtube-transcript-api>=1.0.3
|
|
29
|
+
Provides-Extra: docling
|
|
30
|
+
Requires-Dist: asciidoc; extra == 'docling'
|
|
31
|
+
Requires-Dist: docling[ocr]; extra == 'docling'
|
|
32
|
+
Requires-Dist: pandas; extra == 'docling'
|
|
33
|
+
Requires-Dist: pillow; extra == 'docling'
|
|
28
34
|
Description-Content-Type: text/markdown
|
|
29
35
|
|
|
30
36
|
# Content Core
|
|
@@ -54,8 +60,10 @@ The primary goal of Content Core is to simplify the process of ingesting content
|
|
|
54
60
|
Install Content Core using `pip`:
|
|
55
61
|
|
|
56
62
|
```bash
|
|
57
|
-
# Install the package
|
|
63
|
+
# Install the package (without Docling)
|
|
58
64
|
pip install content-core
|
|
65
|
+
# Install with Docling support
|
|
66
|
+
pip install content-core[docling]
|
|
59
67
|
```
|
|
60
68
|
|
|
61
69
|
Alternatively, if you’re developing locally:
|
|
@@ -224,12 +232,58 @@ async def main():
|
|
|
224
232
|
md_data = await extract_content({"file_path": "path/to/your/document.md"})
|
|
225
233
|
print(md_data)
|
|
226
234
|
|
|
235
|
+
# Per-execution override with Docling
|
|
236
|
+
doc_data = await extract_content({
|
|
237
|
+
"file_path": "path/to/your/document.pdf",
|
|
238
|
+
"engine": "docling",
|
|
239
|
+
"output_format": "html"
|
|
240
|
+
})
|
|
241
|
+
print(doc_data)
|
|
242
|
+
|
|
227
243
|
if __name__ == "__main__":
|
|
228
244
|
asyncio.run(main())
|
|
229
245
|
```
|
|
230
246
|
|
|
231
247
|
(See `src/content_core/notebooks/run.ipynb` for more detailed examples.)
|
|
232
248
|
|
|
249
|
+
## Docling Integration
|
|
250
|
+
|
|
251
|
+
Content Core supports an optional Docling-based extraction engine for rich document formats (PDF, DOCX, PPTX, XLSX, Markdown, AsciiDoc, HTML, CSV, Images).
|
|
252
|
+
|
|
253
|
+
### Installation
|
|
254
|
+
|
|
255
|
+
```bash
|
|
256
|
+
# Install with Docling support
|
|
257
|
+
pip install content-core[docling]
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
### Enabling Docling
|
|
261
|
+
|
|
262
|
+
#### Via configuration file
|
|
263
|
+
|
|
264
|
+
In your `cc_config.yaml` or custom config, set:
|
|
265
|
+
```yaml
|
|
266
|
+
extraction:
|
|
267
|
+
engine: docling # 'legacy' (default) or 'docling'
|
|
268
|
+
docling:
|
|
269
|
+
output_format: markdown # markdown | html | json
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
#### Programmatically in Python
|
|
273
|
+
|
|
274
|
+
```python
|
|
275
|
+
from content_core.config import set_extraction_engine, set_docling_output_format
|
|
276
|
+
|
|
277
|
+
# switch engine to Docling
|
|
278
|
+
set_extraction_engine("docling")
|
|
279
|
+
|
|
280
|
+
# choose output format: 'markdown', 'html', or 'json'
|
|
281
|
+
set_docling_output_format("html")
|
|
282
|
+
|
|
283
|
+
# now use ccore.extract or ccore.ccore
|
|
284
|
+
result = await cc.extract("document.pdf")
|
|
285
|
+
```
|
|
286
|
+
|
|
233
287
|
## Configuration
|
|
234
288
|
|
|
235
289
|
Configuration settings (like API keys for external services, logging levels) can be managed through environment variables or `.env` files, loaded automatically via `python-dotenv`.
|
|
@@ -1,24 +1,26 @@
|
|
|
1
|
-
content_core/__init__.py,sha256=
|
|
2
|
-
content_core/
|
|
1
|
+
content_core/__init__.py,sha256=ANKeslNXOGumwrkjqgRik23e5PdGps2C0FSup8_XH2Y,6515
|
|
2
|
+
content_core/cc_config.yaml,sha256=w66fo5ut6TPaU3o4hkjnroqg2hkr8YuOG3BRtI50j1s,701
|
|
3
|
+
content_core/config.py,sha256=-aUsTB6Z3fa_XIWdHNXhMgWkVLWjEW1kfyQXXB_-j54,1632
|
|
3
4
|
content_core/logging.py,sha256=oeRdWKknEolptopxF1IvnEGEc0ZUw45QXYUEZ71GcdY,438
|
|
4
5
|
content_core/models.py,sha256=FBV_tV6cmI0F82WfcA6xHag-YMsxI1dIbDGWG-3Eq_Y,935
|
|
5
6
|
content_core/models_config.yaml,sha256=Yr-GS94ffxnkaWojUfpErUMM7m_MShsYjR6QuDjMzwo,444
|
|
6
|
-
content_core/prompter.py,sha256=-ShuSyHvK50xlgsAFfA9AnAJV-LlzWwmbPDq2wUZRcI,5793
|
|
7
7
|
content_core/py.typed,sha256=pLuU3XTTeVpXo4UomOjcvAIQqOrzIotlWlJ3KFo2lxQ,154
|
|
8
|
-
content_core/templated_message.py,sha256=
|
|
8
|
+
content_core/templated_message.py,sha256=KbI2rcvgGM5oRIcsG68zAZfgNsC97fR16D61683ZSnY,1617
|
|
9
9
|
content_core/common/__init__.py,sha256=SjDp-0QRjX9PMubyTjv77_GrUqm6eC4gBuXr593JVK4,525
|
|
10
10
|
content_core/common/exceptions.py,sha256=NpYedVbckIq4kP2wek7bicMVgGGn0fkhCvid5cIxfy4,1304
|
|
11
|
-
content_core/common/state.py,sha256=
|
|
11
|
+
content_core/common/state.py,sha256=cJvIwqvrvGxuk1t51bTOvPV-RM5Nbd8F8C4o0dawIXo,1185
|
|
12
12
|
content_core/common/utils.py,sha256=0o4jovPEw_6wu7EcPPbDNZskbhhfLUBJBvRmp0Yc4R4,1182
|
|
13
13
|
content_core/content/__init__.py,sha256=ymocLXXwWnnhQFHCB3jXanNvJ2m27TVs1yO8EhCrefU,171
|
|
14
14
|
content_core/content/cleanup/__init__.py,sha256=wymD24WLDDdsZrv-5WhparSiHBK9SJCcqBHmokuZqk4,121
|
|
15
15
|
content_core/content/cleanup/core.py,sha256=AXUGUWxGob8si5uKRnDrreOcHV_gbGJr4YnRsNm2GX0,531
|
|
16
16
|
content_core/content/extraction/__init__.py,sha256=TaYw6CAcG62GZfsJxeZ6VJDLP85BU2a7_G271v6WWPk,446
|
|
17
|
-
content_core/content/extraction/graph.py,sha256=
|
|
17
|
+
content_core/content/extraction/graph.py,sha256=Sp9XJ6AoLXA_FUFWhmfTMzOC2gkarp1Qg8MsIScLCok,6213
|
|
18
18
|
content_core/content/summary/__init__.py,sha256=ReKCZWKfDtqlInKeh87Y1DEfiNzVWabGybEz3hS2FrI,114
|
|
19
19
|
content_core/content/summary/core.py,sha256=LejUbPxnRD0sbO6MupiIb-IHLxEUGU5beBZwmIiBncc,542
|
|
20
|
-
content_core/notebooks/
|
|
20
|
+
content_core/notebooks/docling.ipynb,sha256=aTad8NORNd-TUMlbX58DURJ4-QCeplTeTT0vUj301m0,631
|
|
21
|
+
content_core/notebooks/run.ipynb,sha256=lV8n1fx_kgIQHBnk1vR6ChBjMS5luAEuDDljsTBNjrQ,369490
|
|
21
22
|
content_core/processors/audio.py,sha256=jDn0_6F5dLcmz_C-iR80uOqOIAz49ELya2R5JeM15vo,3538
|
|
23
|
+
content_core/processors/docling.py,sha256=wQ8ThAcyrCy-c95QtgplQ9UZtjCZTddLD9y1_CrRtSQ,2111
|
|
22
24
|
content_core/processors/office.py,sha256=DXkfmjqUhmhP6rJaO5Z5Y9sv-iK0zaPZ3waynFIPtsk,12153
|
|
23
25
|
content_core/processors/pdf.py,sha256=9jf-eROAqw6yQwdlbsxPXsaJXY26hVG7nSTPH9n4afY,5301
|
|
24
26
|
content_core/processors/text.py,sha256=kKHA60-NYjLmCTYUnk8TdJxQQ0Shkg-K61Ezqaelz7k,1158
|
|
@@ -31,8 +33,8 @@ content_core/tools/__init__.py,sha256=DuJmd7fE-NpDvLP8IW1XY5MUkAQcdks52rn2jk4N8j
|
|
|
31
33
|
content_core/tools/cleanup.py,sha256=5IdKedsFyRQMdYzgFSKtsfyxJldbroXQXHesHICNENI,523
|
|
32
34
|
content_core/tools/extract.py,sha256=-r2_jsuMMXyXxGVqWhh1ilNPo_UMYAbw3Pkp1FzPy5g,577
|
|
33
35
|
content_core/tools/summarize.py,sha256=DPfeglLWB08q8SvHrsKpOKZ35XjduUDs2J02ISwjdj0,596
|
|
34
|
-
content_core-0.
|
|
35
|
-
content_core-0.
|
|
36
|
-
content_core-0.
|
|
37
|
-
content_core-0.
|
|
38
|
-
content_core-0.
|
|
36
|
+
content_core-0.5.1.dist-info/METADATA,sha256=mkvdVcLsiBDGiobgswCVQF8Xkceq5VpIRZspniB61PY,10533
|
|
37
|
+
content_core-0.5.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
38
|
+
content_core-0.5.1.dist-info/entry_points.txt,sha256=9fGQUk6bxBVXj9PRwfWVPn54ClSEJV7J-KBLXtjOhQw,99
|
|
39
|
+
content_core-0.5.1.dist-info/licenses/LICENSE,sha256=myj0z2T4qIkenCgLsRfx7Wk6UqCQNj5c7O14Qx4zpGg,1066
|
|
40
|
+
content_core-0.5.1.dist-info/RECORD,,
|
content_core/prompter.py
DELETED
|
@@ -1,159 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
A prompt management module using Jinja to generate complex prompts with simple templates.
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
import os
|
|
6
|
-
from dataclasses import dataclass
|
|
7
|
-
from datetime import datetime
|
|
8
|
-
from typing import Any, Dict, Optional, Union
|
|
9
|
-
|
|
10
|
-
from dotenv import load_dotenv
|
|
11
|
-
from jinja2 import Environment, FileSystemLoader, Template
|
|
12
|
-
from langchain_core.prompts import ChatPromptTemplate
|
|
13
|
-
from pydantic import BaseModel
|
|
14
|
-
|
|
15
|
-
from content_core.logging import logger
|
|
16
|
-
|
|
17
|
-
load_dotenv()
|
|
18
|
-
|
|
19
|
-
prompt_path_default = os.path.join(
|
|
20
|
-
os.path.dirname(os.path.abspath(__file__)), "prompts"
|
|
21
|
-
)
|
|
22
|
-
prompt_path_custom = os.getenv("PROMPT_PATH")
|
|
23
|
-
|
|
24
|
-
logger.debug(
|
|
25
|
-
f"Pasta de prompts personalizada: {prompt_path_custom if prompt_path_custom else 'Não definida'}"
|
|
26
|
-
)
|
|
27
|
-
logger.debug(f"Pasta de prompts padrão: {prompt_path_default}")
|
|
28
|
-
|
|
29
|
-
env_custom = (
|
|
30
|
-
Environment(loader=FileSystemLoader(prompt_path_custom))
|
|
31
|
-
if prompt_path_custom and os.path.exists(prompt_path_custom)
|
|
32
|
-
else None
|
|
33
|
-
)
|
|
34
|
-
env_default = Environment(loader=FileSystemLoader(prompt_path_default))
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
@dataclass
|
|
38
|
-
class Prompter:
|
|
39
|
-
"""
|
|
40
|
-
A class for managing and rendering prompt templates.
|
|
41
|
-
|
|
42
|
-
Attributes:
|
|
43
|
-
prompt_template (str, optional): The name of the prompt template file.
|
|
44
|
-
prompt_variation (str, optional): The variation of the prompt template.
|
|
45
|
-
prompt_text (str, optional): The raw prompt text.
|
|
46
|
-
template (Union[str, Template], optional): The Jinja2 template object.
|
|
47
|
-
"""
|
|
48
|
-
|
|
49
|
-
prompt_template: Optional[str] = None
|
|
50
|
-
prompt_variation: Optional[str] = "default"
|
|
51
|
-
prompt_text: Optional[str] = None
|
|
52
|
-
template: Optional[Union[str, Template]] = None
|
|
53
|
-
parser: Optional[Any] = None
|
|
54
|
-
|
|
55
|
-
def __init__(self, prompt_template=None, prompt_text=None, parser=None):
|
|
56
|
-
"""
|
|
57
|
-
Initialize the Prompter with either a template file or raw text.
|
|
58
|
-
|
|
59
|
-
Args:
|
|
60
|
-
prompt_template (str, optional): The name of the prompt template file.
|
|
61
|
-
prompt_text (str, optional): The raw prompt text.
|
|
62
|
-
"""
|
|
63
|
-
self.prompt_template = prompt_template
|
|
64
|
-
self.prompt_text = prompt_text
|
|
65
|
-
self.parser = parser
|
|
66
|
-
self.setup()
|
|
67
|
-
|
|
68
|
-
def setup(self):
|
|
69
|
-
"""
|
|
70
|
-
Set up the Jinja2 template based on the provided template file or text.
|
|
71
|
-
Raises:
|
|
72
|
-
ValueError: If neither prompt_template nor prompt_text is provided.
|
|
73
|
-
"""
|
|
74
|
-
if self.prompt_template:
|
|
75
|
-
# Primeiro tenta carregar da pasta personalizada, se disponível
|
|
76
|
-
if env_custom:
|
|
77
|
-
try:
|
|
78
|
-
self.template = env_custom.get_template(
|
|
79
|
-
f"{self.prompt_template}.jinja"
|
|
80
|
-
)
|
|
81
|
-
logger.debug(
|
|
82
|
-
f"Template {self.prompt_template} carregado da pasta personalizada"
|
|
83
|
-
)
|
|
84
|
-
return
|
|
85
|
-
except Exception as e:
|
|
86
|
-
logger.debug(
|
|
87
|
-
f"Template {self.prompt_template} não encontrado na pasta personalizada: {e}"
|
|
88
|
-
)
|
|
89
|
-
|
|
90
|
-
# Se não encontrou na personalizada ou não há pasta personalizada, tenta a padrão
|
|
91
|
-
try:
|
|
92
|
-
self.template = env_default.get_template(
|
|
93
|
-
f"{self.prompt_template}.jinja"
|
|
94
|
-
)
|
|
95
|
-
logger.debug(
|
|
96
|
-
f"Template {self.prompt_template} carregado da pasta padrão"
|
|
97
|
-
)
|
|
98
|
-
except Exception as e:
|
|
99
|
-
raise ValueError(
|
|
100
|
-
f"Template {self.prompt_template} não encontrado na pasta padrão: {e}"
|
|
101
|
-
)
|
|
102
|
-
elif self.prompt_text:
|
|
103
|
-
self.template = Template(self.prompt_text)
|
|
104
|
-
else:
|
|
105
|
-
raise ValueError("Prompter must have a prompt_template or prompt_text")
|
|
106
|
-
|
|
107
|
-
assert self.prompt_template or self.prompt_text, "Prompt is required"
|
|
108
|
-
|
|
109
|
-
def to_langchain(self):
|
|
110
|
-
if isinstance(self.template, str):
|
|
111
|
-
template_text = self.template
|
|
112
|
-
else:
|
|
113
|
-
# For file-based templates, read the raw content
|
|
114
|
-
template_path = os.path.join("prompts", f"{self.prompt_template}.jinja")
|
|
115
|
-
with open(template_path, "r") as f:
|
|
116
|
-
template_text = f.read()
|
|
117
|
-
return ChatPromptTemplate.from_template(template_text, template_format="jinja2")
|
|
118
|
-
|
|
119
|
-
@classmethod
|
|
120
|
-
def from_text(cls, text: str):
|
|
121
|
-
"""
|
|
122
|
-
Create a Prompter instance from raw text, which can contain Jinja code.
|
|
123
|
-
|
|
124
|
-
Args:
|
|
125
|
-
text (str): The raw prompt text.
|
|
126
|
-
|
|
127
|
-
Returns:
|
|
128
|
-
Prompter: A new Prompter instance.
|
|
129
|
-
"""
|
|
130
|
-
|
|
131
|
-
return cls(prompt_text=text)
|
|
132
|
-
|
|
133
|
-
def render(self, data: Optional[Union[Dict, BaseModel]] = {}) -> str:
|
|
134
|
-
"""
|
|
135
|
-
Render the prompt template with the given data.
|
|
136
|
-
|
|
137
|
-
Args:
|
|
138
|
-
data (Union[Dict, BaseModel]): The data to be used in rendering the template.
|
|
139
|
-
Can be either a dictionary or a Pydantic BaseModel.
|
|
140
|
-
|
|
141
|
-
Returns:
|
|
142
|
-
str: The rendered prompt text.
|
|
143
|
-
|
|
144
|
-
Raises:
|
|
145
|
-
AssertionError: If the template is not defined or not a Jinja2 Template.
|
|
146
|
-
"""
|
|
147
|
-
# Convert Pydantic model to dict if necessary
|
|
148
|
-
data_dict = data.model_dump() if isinstance(data, BaseModel) else data
|
|
149
|
-
# Create a new mutable dictionary with the original data
|
|
150
|
-
render_data = dict(data_dict)
|
|
151
|
-
render_data["current_time"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
152
|
-
if self.parser:
|
|
153
|
-
render_data["format_instructions"] = self.parser.get_format_instructions()
|
|
154
|
-
assert self.template, "Prompter template is not defined"
|
|
155
|
-
assert isinstance(
|
|
156
|
-
self.template, Template
|
|
157
|
-
), "Prompter template is not a Jinja2 Template"
|
|
158
|
-
return self.template.render(render_data)
|
|
159
|
-
return self.template.render(render_data)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|