content-core 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of content-core might be problematic. Click here for more details.

@@ -1,13 +1,13 @@
1
1
  import asyncio
2
2
  import os
3
+ import tempfile
4
+ import math
5
+ import traceback
3
6
  from functools import partial
4
- from math import ceil
5
-
6
- from pydub import AudioSegment
7
+ from moviepy import AudioFileClip
7
8
 
8
9
  from content_core.common import ProcessSourceState
9
10
  from content_core.logging import logger
10
- from content_core.models import ModelFactory
11
11
 
12
12
  # todo: remove reference to model_manager
13
13
  # future: parallelize the transcription process
@@ -29,31 +29,29 @@ async def split_audio(input_file, segment_length_minutes=15, output_prefix=None)
29
29
  output_prefix = os.path.splitext(os.path.basename(input_file_abs))[0]
30
30
 
31
31
  # Load the audio file
32
- audio = AudioSegment.from_file(input_file_abs)
32
+ audio = AudioFileClip(input_file_abs)
33
33
 
34
- # Calculate segment length in milliseconds
35
- segment_length_ms = segment_length_minutes * 60 * 1000
34
+ # Calculate segment length in seconds
35
+ segment_length_s = segment_length_minutes * 60
36
36
 
37
37
  # Calculate number of segments
38
- total_segments = ceil(len(audio) / segment_length_ms)
38
+ total_segments = math.ceil(audio.duration / segment_length_s)
39
39
  logger.debug(f"Splitting file: {input_file_abs} into {total_segments} segments")
40
40
 
41
41
  output_files = []
42
42
 
43
43
  # Split the audio into segments
44
44
  for i in range(total_segments):
45
- start_time = i * segment_length_ms
46
- end_time = min((i + 1) * segment_length_ms, len(audio))
45
+ start_time = i * segment_length_s
46
+ end_time = min((i + 1) * segment_length_s, audio.duration)
47
47
 
48
48
  # Extract segment
49
- segment = audio[start_time:end_time]
50
-
51
- # Generate output filename
52
49
  output_filename = f"{output_prefix}_{str(i+1).zfill(3)}.mp3"
53
50
  output_path = os.path.join(output_dir, output_filename)
54
51
 
55
52
  # Export segment
56
- segment.export(output_path, format="mp3")
53
+ extract_audio(input_file_abs, output_path, start_time, end_time)
54
+
57
55
  output_files.append(output_path)
58
56
 
59
57
  logger.debug(f"Exported segment {i+1}/{total_segments}: {output_filename}")
@@ -66,43 +64,87 @@ async def split_audio(input_file, segment_length_minutes=15, output_prefix=None)
66
64
  )
67
65
 
68
66
 
67
+ def extract_audio(input_file: str, output_file: str, start_time: float = None, end_time: float = None) -> None:
68
+ """
69
+ Extract audio from a video or audio file and save it as an MP3 file.
70
+ If start_time and end_time are provided, only that segment of audio is extracted.
71
+
72
+ Args:
73
+ input_file (str): Path to the input video or audio file.
74
+ output_file (str): Path where the output MP3 file will be saved.
75
+ start_time (float, optional): Start time of the audio segment in seconds. Defaults to None.
76
+ end_time (float, optional): End time of the audio segment in seconds. Defaults to None.
77
+ """
78
+ try:
79
+ # Load the file as an AudioFileClip
80
+ audio_clip = AudioFileClip(input_file)
81
+
82
+ # If start_time and end_time are provided, trim the audio
83
+ if start_time is not None and end_time is not None:
84
+ audio_clip = audio_clip.cutout(0, start_time).cutout(end_time - start_time, audio_clip.duration)
85
+ elif start_time is not None:
86
+ audio_clip = audio_clip.cutout(0, start_time)
87
+ elif end_time is not None:
88
+ audio_clip = audio_clip.cutout(end_time, audio_clip.duration)
89
+
90
+ # Export the audio as MP3
91
+ audio_clip.write_audiofile(output_file, codec='mp3')
92
+ audio_clip.close()
93
+ except Exception as e:
94
+ logger.error(f"Error extracting audio: {str(e)}")
95
+ raise
96
+
97
+
69
98
  async def transcribe_audio_segment(audio_file, model):
70
99
  """Transcribe a single audio segment asynchronously"""
71
100
  return (await model.atranscribe(audio_file)).text
72
101
 
73
102
 
74
- async def extract_audio(data: ProcessSourceState):
103
+ async def extract_audio_data(data: ProcessSourceState):
75
104
  input_audio_path = data.file_path
76
- audio_files = []
77
105
 
78
106
  try:
79
- # Split audio into segments
80
- audio_files = await split_audio(input_audio_path)
107
+ # Create a temporary directory for audio segments
108
+ temp_dir = tempfile.mkdtemp()
109
+ output_prefix = os.path.splitext(os.path.basename(input_audio_path))[0]
110
+ output_dir = temp_dir
111
+ os.makedirs(output_dir, exist_ok=True)
81
112
 
82
- # Transcribe all segments concurrently
83
- speech_to_text_model = ModelFactory.get_model("speech_to_text")
84
- transcribe_tasks = [
85
- transcribe_audio_segment(audio_file, speech_to_text_model)
86
- for audio_file in audio_files
87
- ]
88
- transcriptions = await asyncio.gather(*transcribe_tasks)
113
+ # Split audio into segments if longer than 10 minutes
114
+ audio = AudioFileClip(input_audio_path)
115
+ duration_s = audio.duration
116
+ segment_length_s = 10 * 60 # 10 minutes in seconds
117
+ output_files = []
118
+
119
+ if duration_s > segment_length_s:
120
+ logger.info(f"Audio is longer than 10 minutes ({duration_s}s), splitting into {math.ceil(duration_s / segment_length_s)} segments")
121
+ for i in range(math.ceil(duration_s / segment_length_s)):
122
+ start_time = i * segment_length_s
123
+ end_time = min((i + 1) * segment_length_s, audio.duration)
89
124
 
90
- return {"content": " ".join(transcriptions)}
125
+ # Extract segment
126
+ output_filename = f"{output_prefix}_{str(i+1).zfill(3)}.mp3"
127
+ output_path = os.path.join(output_dir, output_filename)
91
128
 
129
+ extract_audio(input_audio_path, output_path, start_time, end_time)
130
+
131
+ output_files.append(output_path)
132
+ else:
133
+ output_files = [input_audio_path]
134
+
135
+ # Transcribe audio files
136
+ from content_core.models import ModelFactory
137
+ speech_to_text_model = ModelFactory.get_model("speech_to_text")
138
+ transcriptions = []
139
+ for audio_file in output_files:
140
+ transcription = await transcribe_audio_segment(audio_file, speech_to_text_model)
141
+ transcriptions.append(transcription)
142
+
143
+ return {
144
+ "metadata": {"audio_files": output_files},
145
+ "content": " ".join(transcriptions)
146
+ }
92
147
  except Exception as e:
93
- logger.error(f"Error transcribing audio: {str(e)}")
94
- logger.exception(e)
148
+ logger.error(f"Error processing audio: {str(e)}")
149
+ logger.error(traceback.format_exc())
95
150
  raise
96
-
97
- finally:
98
- # Clean up temporary files
99
- def _cleanup(files):
100
- for file in files:
101
- try:
102
- os.remove(file)
103
- except OSError as e:
104
- logger.error(f"Error removing temporary file {file}: {str(e)}")
105
-
106
- await asyncio.get_event_loop().run_in_executor(
107
- None, partial(_cleanup, audio_files)
108
- )
@@ -1,18 +1,18 @@
1
1
  from typing import Dict, Optional, Union
2
2
 
3
+ from ai_prompter import Prompter
3
4
  from esperanto import LanguageModel
4
5
  from esperanto.common_types import Message
5
6
  from pydantic import BaseModel, Field
6
7
 
7
8
  from content_core.models import ModelFactory
8
- from content_core.prompter import Prompter
9
9
 
10
10
 
11
11
  class TemplatedMessageInput(BaseModel):
12
- system_prompt_template: Optional[str] = ""
13
- system_prompt_text: Optional[str] = ""
14
- user_prompt_template: Optional[str] = ""
15
- user_prompt_text: Optional[str] = ""
12
+ system_prompt_template: Optional[str] = None
13
+ system_prompt_text: Optional[str] = None
14
+ user_prompt_template: Optional[str] = None
15
+ user_prompt_text: Optional[str] = None
16
16
  data: Optional[Union[Dict, BaseModel]] = Field(default_factory=lambda: {})
17
17
  config: Dict = Field(
18
18
  description="The config for the LLM",
@@ -28,30 +28,22 @@ async def templated_message(
28
28
  input: TemplatedMessageInput, model: Optional[LanguageModel] = None
29
29
  ) -> str:
30
30
  if not model:
31
- model = ModelFactory.get_model('default_model')
31
+ model = ModelFactory.get_model("default_model")
32
32
 
33
33
  msgs = []
34
34
  if input.system_prompt_template or input.system_prompt_text:
35
- msgs.append(
36
- Message(
37
- role="system",
38
- content=Prompter(
39
- prompt_template=input.system_prompt_template,
40
- prompt_text=input.system_prompt_text,
41
- ).render(data=input.data),
42
- )
43
- )
35
+ system_prompt = Prompter(
36
+ prompt_template=input.system_prompt_template,
37
+ template_text=input.system_prompt_text,
38
+ ).render(data=input.data)
39
+ msgs.append(Message(role="system", content=system_prompt))
44
40
 
45
41
  if input.user_prompt_template or input.user_prompt_text:
46
- msgs.append(
47
- Message(
48
- role="user",
49
- content=Prompter(
50
- prompt_template=input.user_prompt_template,
51
- prompt_text=input.user_prompt_text,
52
- ).render(data=input.data),
53
- )
54
- )
42
+ user_prompt = Prompter(
43
+ prompt_template=input.user_prompt_template,
44
+ template_text=input.user_prompt_text,
45
+ ).render(data=input.data)
46
+ msgs.append(Message(role="user", content=user_prompt))
55
47
 
56
48
  result = await model.achat_complete(msgs)
57
49
  return result.content
@@ -1,10 +1,11 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: content-core
3
- Version: 0.5.0
3
+ Version: 0.6.0
4
4
  Summary: Extract what matters from any media source
5
5
  Author-email: LUIS NOVO <lfnovo@gmail.com>
6
6
  License-File: LICENSE
7
7
  Requires-Python: >=3.10
8
+ Requires-Dist: ai-prompter>=0.2.3
8
9
  Requires-Dist: aiohttp>=3.11
9
10
  Requires-Dist: bs4>=0.0.2
10
11
  Requires-Dist: dicttoxml>=1.7.16
@@ -14,10 +15,10 @@ Requires-Dist: jinja2>=3.1.6
14
15
  Requires-Dist: langdetect>=1.0.9
15
16
  Requires-Dist: langgraph>=0.3.29
16
17
  Requires-Dist: loguru>=0.7.3
18
+ Requires-Dist: moviepy>=2.1.2
17
19
  Requires-Dist: openai>=1.73.0
18
20
  Requires-Dist: openpyxl>=3.1.5
19
21
  Requires-Dist: pandas>=2.2.3
20
- Requires-Dist: pydub>=0.25.1
21
22
  Requires-Dist: pymupdf>=1.25.5
22
23
  Requires-Dist: python-docx>=1.1.2
23
24
  Requires-Dist: python-dotenv>=1.1.0
@@ -1,12 +1,11 @@
1
- content_core/__init__.py,sha256=sBCcvRJ-9u5htV5AdptlYPNO0R8NmAex2K1XAkJAoL0,6474
1
+ content_core/__init__.py,sha256=ANKeslNXOGumwrkjqgRik23e5PdGps2C0FSup8_XH2Y,6515
2
2
  content_core/cc_config.yaml,sha256=w66fo5ut6TPaU3o4hkjnroqg2hkr8YuOG3BRtI50j1s,701
3
3
  content_core/config.py,sha256=-aUsTB6Z3fa_XIWdHNXhMgWkVLWjEW1kfyQXXB_-j54,1632
4
4
  content_core/logging.py,sha256=oeRdWKknEolptopxF1IvnEGEc0ZUw45QXYUEZ71GcdY,438
5
5
  content_core/models.py,sha256=FBV_tV6cmI0F82WfcA6xHag-YMsxI1dIbDGWG-3Eq_Y,935
6
6
  content_core/models_config.yaml,sha256=Yr-GS94ffxnkaWojUfpErUMM7m_MShsYjR6QuDjMzwo,444
7
- content_core/prompter.py,sha256=-ShuSyHvK50xlgsAFfA9AnAJV-LlzWwmbPDq2wUZRcI,5793
8
7
  content_core/py.typed,sha256=pLuU3XTTeVpXo4UomOjcvAIQqOrzIotlWlJ3KFo2lxQ,154
9
- content_core/templated_message.py,sha256=iWz-TwWq08mspgZW3EgIGf7HqtW1tXuTDpo9FkNwixQ,1729
8
+ content_core/templated_message.py,sha256=KbI2rcvgGM5oRIcsG68zAZfgNsC97fR16D61683ZSnY,1617
10
9
  content_core/common/__init__.py,sha256=SjDp-0QRjX9PMubyTjv77_GrUqm6eC4gBuXr593JVK4,525
11
10
  content_core/common/exceptions.py,sha256=NpYedVbckIq4kP2wek7bicMVgGGn0fkhCvid5cIxfy4,1304
12
11
  content_core/common/state.py,sha256=cJvIwqvrvGxuk1t51bTOvPV-RM5Nbd8F8C4o0dawIXo,1185
@@ -15,12 +14,11 @@ content_core/content/__init__.py,sha256=ymocLXXwWnnhQFHCB3jXanNvJ2m27TVs1yO8EhCr
15
14
  content_core/content/cleanup/__init__.py,sha256=wymD24WLDDdsZrv-5WhparSiHBK9SJCcqBHmokuZqk4,121
16
15
  content_core/content/cleanup/core.py,sha256=AXUGUWxGob8si5uKRnDrreOcHV_gbGJr4YnRsNm2GX0,531
17
16
  content_core/content/extraction/__init__.py,sha256=TaYw6CAcG62GZfsJxeZ6VJDLP85BU2a7_G271v6WWPk,446
18
- content_core/content/extraction/graph.py,sha256=Sp9XJ6AoLXA_FUFWhmfTMzOC2gkarp1Qg8MsIScLCok,6213
17
+ content_core/content/extraction/graph.py,sha256=d5Hp7GS2dFpYQIHFTIFhU-7ySZ3lfipdDxZZpe2DXS8,6361
19
18
  content_core/content/summary/__init__.py,sha256=ReKCZWKfDtqlInKeh87Y1DEfiNzVWabGybEz3hS2FrI,114
20
19
  content_core/content/summary/core.py,sha256=LejUbPxnRD0sbO6MupiIb-IHLxEUGU5beBZwmIiBncc,542
21
- content_core/notebooks/docling.ipynb,sha256=aTad8NORNd-TUMlbX58DURJ4-QCeplTeTT0vUj301m0,631
22
- content_core/notebooks/run.ipynb,sha256=vmOYratdx0MnhNChjq3I5b7K2iYWuqO2dECK4Dp0jbU,369422
23
- content_core/processors/audio.py,sha256=jDn0_6F5dLcmz_C-iR80uOqOIAz49ELya2R5JeM15vo,3538
20
+ content_core/notebooks/run.ipynb,sha256=WPBNcQUNXR5MldNMghVcU4vE4ibrVmlANa80baQn8TA,371078
21
+ content_core/processors/audio.py,sha256=KnwxK85X9qRyVziMhFd103kfHkE8qGB1D4yW5lYO90E,5701
24
22
  content_core/processors/docling.py,sha256=wQ8ThAcyrCy-c95QtgplQ9UZtjCZTddLD9y1_CrRtSQ,2111
25
23
  content_core/processors/office.py,sha256=DXkfmjqUhmhP6rJaO5Z5Y9sv-iK0zaPZ3waynFIPtsk,12153
26
24
  content_core/processors/pdf.py,sha256=9jf-eROAqw6yQwdlbsxPXsaJXY26hVG7nSTPH9n4afY,5301
@@ -28,14 +26,12 @@ content_core/processors/text.py,sha256=kKHA60-NYjLmCTYUnk8TdJxQQ0Shkg-K61Ezqaelz
28
26
  content_core/processors/url.py,sha256=yhAnvIlYKc13iZedwA0ck6h6wd2j6T-Q2NAtMen3hIs,6783
29
27
  content_core/processors/video.py,sha256=3WnZwTswvTLm8PtQhKwoqJ2BH6YZi62dMUjALwJiebo,5196
30
28
  content_core/processors/youtube.py,sha256=nM286Km7FLN0r1f-n-dRkqs6mSXxCo4YOhTeGzj7Suo,5798
31
- content_core/prompts/content/cleanup.jinja,sha256=elyjbm9O_AeOcxkG-kui5wjBIRiOQCicjm92I4NmoVA,693
32
- content_core/prompts/content/summarize.jinja,sha256=zLPbomfjA-tQZr-c_rOqvKhd55R8NN3Q2gLyLR1sKso,817
33
29
  content_core/tools/__init__.py,sha256=DuJmd7fE-NpDvLP8IW1XY5MUkAQcdks52rn2jk4N8jQ,231
34
30
  content_core/tools/cleanup.py,sha256=5IdKedsFyRQMdYzgFSKtsfyxJldbroXQXHesHICNENI,523
35
31
  content_core/tools/extract.py,sha256=-r2_jsuMMXyXxGVqWhh1ilNPo_UMYAbw3Pkp1FzPy5g,577
36
32
  content_core/tools/summarize.py,sha256=DPfeglLWB08q8SvHrsKpOKZ35XjduUDs2J02ISwjdj0,596
37
- content_core-0.5.0.dist-info/METADATA,sha256=3im9n4tqCrStAX1UkdR42NnODwwHggKeQJdYL_eX68U,10499
38
- content_core-0.5.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
39
- content_core-0.5.0.dist-info/entry_points.txt,sha256=9fGQUk6bxBVXj9PRwfWVPn54ClSEJV7J-KBLXtjOhQw,99
40
- content_core-0.5.0.dist-info/licenses/LICENSE,sha256=myj0z2T4qIkenCgLsRfx7Wk6UqCQNj5c7O14Qx4zpGg,1066
41
- content_core-0.5.0.dist-info/RECORD,,
33
+ content_core-0.6.0.dist-info/METADATA,sha256=pn72ciBGpWE7tVvJ2j3NmQPmFB60cNrkHBmp5ziuyqk,10534
34
+ content_core-0.6.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
35
+ content_core-0.6.0.dist-info/entry_points.txt,sha256=9fGQUk6bxBVXj9PRwfWVPn54ClSEJV7J-KBLXtjOhQw,99
36
+ content_core-0.6.0.dist-info/licenses/LICENSE,sha256=myj0z2T4qIkenCgLsRfx7Wk6UqCQNj5c7O14Qx4zpGg,1066
37
+ content_core-0.6.0.dist-info/RECORD,,
@@ -1,27 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": null,
6
- "metadata": {},
7
- "outputs": [],
8
- "source": [
9
- "from docling.document_converter import DocumentConverter\n",
10
- "\n",
11
- "\n",
12
- "source = \"/Users/luisnovo/dev/projetos/content-core/tests/input_content/file.docx\"\n",
13
- "source_url = \"https://arxiv.org/pdf/2408.09869\" # PDF path or URL\n",
14
- "converter = DocumentConverter()\n",
15
- "result = converter.convert(source)\n",
16
- "print(result.document.export_to_markdown())"
17
- ]
18
- }
19
- ],
20
- "metadata": {
21
- "language_info": {
22
- "name": "python"
23
- }
24
- },
25
- "nbformat": 4,
26
- "nbformat_minor": 2
27
- }
content_core/prompter.py DELETED
@@ -1,159 +0,0 @@
1
- """
2
- A prompt management module using Jinja to generate complex prompts with simple templates.
3
- """
4
-
5
- import os
6
- from dataclasses import dataclass
7
- from datetime import datetime
8
- from typing import Any, Dict, Optional, Union
9
-
10
- from dotenv import load_dotenv
11
- from jinja2 import Environment, FileSystemLoader, Template
12
- from langchain_core.prompts import ChatPromptTemplate
13
- from pydantic import BaseModel
14
-
15
- from content_core.logging import logger
16
-
17
- load_dotenv()
18
-
19
- prompt_path_default = os.path.join(
20
- os.path.dirname(os.path.abspath(__file__)), "prompts"
21
- )
22
- prompt_path_custom = os.getenv("PROMPT_PATH")
23
-
24
- logger.debug(
25
- f"Pasta de prompts personalizada: {prompt_path_custom if prompt_path_custom else 'Não definida'}"
26
- )
27
- logger.debug(f"Pasta de prompts padrão: {prompt_path_default}")
28
-
29
- env_custom = (
30
- Environment(loader=FileSystemLoader(prompt_path_custom))
31
- if prompt_path_custom and os.path.exists(prompt_path_custom)
32
- else None
33
- )
34
- env_default = Environment(loader=FileSystemLoader(prompt_path_default))
35
-
36
-
37
- @dataclass
38
- class Prompter:
39
- """
40
- A class for managing and rendering prompt templates.
41
-
42
- Attributes:
43
- prompt_template (str, optional): The name of the prompt template file.
44
- prompt_variation (str, optional): The variation of the prompt template.
45
- prompt_text (str, optional): The raw prompt text.
46
- template (Union[str, Template], optional): The Jinja2 template object.
47
- """
48
-
49
- prompt_template: Optional[str] = None
50
- prompt_variation: Optional[str] = "default"
51
- prompt_text: Optional[str] = None
52
- template: Optional[Union[str, Template]] = None
53
- parser: Optional[Any] = None
54
-
55
- def __init__(self, prompt_template=None, prompt_text=None, parser=None):
56
- """
57
- Initialize the Prompter with either a template file or raw text.
58
-
59
- Args:
60
- prompt_template (str, optional): The name of the prompt template file.
61
- prompt_text (str, optional): The raw prompt text.
62
- """
63
- self.prompt_template = prompt_template
64
- self.prompt_text = prompt_text
65
- self.parser = parser
66
- self.setup()
67
-
68
- def setup(self):
69
- """
70
- Set up the Jinja2 template based on the provided template file or text.
71
- Raises:
72
- ValueError: If neither prompt_template nor prompt_text is provided.
73
- """
74
- if self.prompt_template:
75
- # Primeiro tenta carregar da pasta personalizada, se disponível
76
- if env_custom:
77
- try:
78
- self.template = env_custom.get_template(
79
- f"{self.prompt_template}.jinja"
80
- )
81
- logger.debug(
82
- f"Template {self.prompt_template} carregado da pasta personalizada"
83
- )
84
- return
85
- except Exception as e:
86
- logger.debug(
87
- f"Template {self.prompt_template} não encontrado na pasta personalizada: {e}"
88
- )
89
-
90
- # Se não encontrou na personalizada ou não há pasta personalizada, tenta a padrão
91
- try:
92
- self.template = env_default.get_template(
93
- f"{self.prompt_template}.jinja"
94
- )
95
- logger.debug(
96
- f"Template {self.prompt_template} carregado da pasta padrão"
97
- )
98
- except Exception as e:
99
- raise ValueError(
100
- f"Template {self.prompt_template} não encontrado na pasta padrão: {e}"
101
- )
102
- elif self.prompt_text:
103
- self.template = Template(self.prompt_text)
104
- else:
105
- raise ValueError("Prompter must have a prompt_template or prompt_text")
106
-
107
- assert self.prompt_template or self.prompt_text, "Prompt is required"
108
-
109
- def to_langchain(self):
110
- if isinstance(self.template, str):
111
- template_text = self.template
112
- else:
113
- # For file-based templates, read the raw content
114
- template_path = os.path.join("prompts", f"{self.prompt_template}.jinja")
115
- with open(template_path, "r") as f:
116
- template_text = f.read()
117
- return ChatPromptTemplate.from_template(template_text, template_format="jinja2")
118
-
119
- @classmethod
120
- def from_text(cls, text: str):
121
- """
122
- Create a Prompter instance from raw text, which can contain Jinja code.
123
-
124
- Args:
125
- text (str): The raw prompt text.
126
-
127
- Returns:
128
- Prompter: A new Prompter instance.
129
- """
130
-
131
- return cls(prompt_text=text)
132
-
133
- def render(self, data: Optional[Union[Dict, BaseModel]] = {}) -> str:
134
- """
135
- Render the prompt template with the given data.
136
-
137
- Args:
138
- data (Union[Dict, BaseModel]): The data to be used in rendering the template.
139
- Can be either a dictionary or a Pydantic BaseModel.
140
-
141
- Returns:
142
- str: The rendered prompt text.
143
-
144
- Raises:
145
- AssertionError: If the template is not defined or not a Jinja2 Template.
146
- """
147
- # Convert Pydantic model to dict if necessary
148
- data_dict = data.model_dump() if isinstance(data, BaseModel) else data
149
- # Create a new mutable dictionary with the original data
150
- render_data = dict(data_dict)
151
- render_data["current_time"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
152
- if self.parser:
153
- render_data["format_instructions"] = self.parser.get_format_instructions()
154
- assert self.template, "Prompter template is not defined"
155
- assert isinstance(
156
- self.template, Template
157
- ), "Prompter template is not a Jinja2 Template"
158
- return self.template.render(render_data)
159
- return self.template.render(render_data)
@@ -1,16 +0,0 @@
1
- # GOAL
2
-
3
- Adjust the content below to make it clean and readable:
4
- Remove repeated strings that do not add value to the text.
5
-
6
- Remove any content unrelated to the text itself (e.g., metadata, artifacts, or extraction errors).
7
-
8
- Format the output as unstructured but clear text.
9
-
10
- Do not add extra text, introductions, conclusions, or commentary—only rewrite the provided content as it is.
11
-
12
- Do not interpret, analyze, or alter the meaning, intent, or narrative of the text—just reformat it for clarity and readability.
13
-
14
- Do not change the text structure, do not write conclusions about it. Your only job is to make it readable.
15
-
16
- Keep the text in its original language, regardless of what it is.
@@ -1,25 +0,0 @@
1
- You are an AI assistant for a personal study platform.
2
-
3
- In this platform, your user collects various articles and content from the Internet for reference and study.
4
-
5
- Your role is to summarize the selected content as densely as possible, helping the reader extract maximum value from it without reading the full text.
6
- Focus solely on the content's value, avoiding unnecessary comments or messages.
7
-
8
- The summary should be dense, rich in characters, and designed to create a powerful vector representation.
9
- If the user provided additional context, follow its instructions. Otherwise, summary the whole content.
10
-
11
- Do not return any acknowledgments or greetings—only the summary.
12
-
13
- CONTENT:
14
-
15
- {{ content }}
16
-
17
- {% if context %}
18
- CONTEXT:
19
-
20
- User has provided the aditional context for your task:
21
- {{context}}
22
- {% endif%}
23
-
24
-
25
- SUMMARY: