content-core 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of content-core might be problematic. Click here for more details.
- content_core/notebooks/run.ipynb +18 -179
- content_core/prompter.py +51 -9
- content_core/prompts/content/cleanup.jinja +16 -0
- content_core/prompts/content/summarize.jinja +25 -0
- {content_core-0.1.1.dist-info → content_core-0.2.0.dist-info}/METADATA +17 -3
- {content_core-0.1.1.dist-info → content_core-0.2.0.dist-info}/RECORD +9 -7
- {content_core-0.1.1.dist-info → content_core-0.2.0.dist-info}/WHEEL +0 -0
- {content_core-0.1.1.dist-info → content_core-0.2.0.dist-info}/entry_points.txt +0 -0
- {content_core-0.1.1.dist-info → content_core-0.2.0.dist-info}/licenses/LICENSE +0 -0
content_core/prompter.py
CHANGED
|
@@ -10,13 +10,27 @@ from typing import Any, Dict, Optional, Union
|
|
|
10
10
|
from dotenv import load_dotenv
|
|
11
11
|
from jinja2 import Environment, FileSystemLoader, Template
|
|
12
12
|
from langchain_core.prompts import ChatPromptTemplate
|
|
13
|
+
from loguru import logger
|
|
13
14
|
from pydantic import BaseModel
|
|
14
15
|
|
|
15
16
|
load_dotenv()
|
|
16
17
|
|
|
17
|
-
|
|
18
|
+
prompt_path_default = os.path.join(
|
|
19
|
+
os.path.dirname(os.path.abspath(__file__)), "prompts"
|
|
20
|
+
)
|
|
21
|
+
prompt_path_custom = os.getenv("PROMPT_PATH")
|
|
18
22
|
|
|
19
|
-
|
|
23
|
+
logger.debug(
|
|
24
|
+
f"Pasta de prompts personalizada: {prompt_path_custom if prompt_path_custom else 'Não definida'}"
|
|
25
|
+
)
|
|
26
|
+
logger.debug(f"Pasta de prompts padrão: {prompt_path_default}")
|
|
27
|
+
|
|
28
|
+
env_custom = (
|
|
29
|
+
Environment(loader=FileSystemLoader(prompt_path_custom))
|
|
30
|
+
if prompt_path_custom and os.path.exists(prompt_path_custom)
|
|
31
|
+
else None
|
|
32
|
+
)
|
|
33
|
+
env_default = Environment(loader=FileSystemLoader(prompt_path_default))
|
|
20
34
|
|
|
21
35
|
|
|
22
36
|
@dataclass
|
|
@@ -57,7 +71,33 @@ class Prompter:
|
|
|
57
71
|
ValueError: If neither prompt_template nor prompt_text is provided.
|
|
58
72
|
"""
|
|
59
73
|
if self.prompt_template:
|
|
60
|
-
|
|
74
|
+
# Primeiro tenta carregar da pasta personalizada, se disponível
|
|
75
|
+
if env_custom:
|
|
76
|
+
try:
|
|
77
|
+
self.template = env_custom.get_template(
|
|
78
|
+
f"{self.prompt_template}.jinja"
|
|
79
|
+
)
|
|
80
|
+
logger.debug(
|
|
81
|
+
f"Template {self.prompt_template} carregado da pasta personalizada"
|
|
82
|
+
)
|
|
83
|
+
return
|
|
84
|
+
except Exception as e:
|
|
85
|
+
logger.debug(
|
|
86
|
+
f"Template {self.prompt_template} não encontrado na pasta personalizada: {e}"
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
# Se não encontrou na personalizada ou não há pasta personalizada, tenta a padrão
|
|
90
|
+
try:
|
|
91
|
+
self.template = env_default.get_template(
|
|
92
|
+
f"{self.prompt_template}.jinja"
|
|
93
|
+
)
|
|
94
|
+
logger.debug(
|
|
95
|
+
f"Template {self.prompt_template} carregado da pasta padrão"
|
|
96
|
+
)
|
|
97
|
+
except Exception as e:
|
|
98
|
+
raise ValueError(
|
|
99
|
+
f"Template {self.prompt_template} não encontrado na pasta padrão: {e}"
|
|
100
|
+
)
|
|
61
101
|
elif self.prompt_text:
|
|
62
102
|
self.template = Template(self.prompt_text)
|
|
63
103
|
else:
|
|
@@ -105,11 +145,13 @@ class Prompter:
|
|
|
105
145
|
"""
|
|
106
146
|
# Convert Pydantic model to dict if necessary
|
|
107
147
|
data_dict = data.model_dump() if isinstance(data, BaseModel) else data
|
|
108
|
-
|
|
148
|
+
# Create a new mutable dictionary with the original data
|
|
149
|
+
render_data = dict(data_dict)
|
|
150
|
+
render_data["current_time"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
109
151
|
if self.parser:
|
|
110
|
-
|
|
152
|
+
render_data["format_instructions"] = self.parser.get_format_instructions()
|
|
111
153
|
assert self.template, "Prompter template is not defined"
|
|
112
|
-
assert isinstance(
|
|
113
|
-
|
|
114
|
-
)
|
|
115
|
-
return self.template.render(
|
|
154
|
+
assert isinstance(
|
|
155
|
+
self.template, Template
|
|
156
|
+
), "Prompter template is not a Jinja2 Template"
|
|
157
|
+
return self.template.render(render_data)
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# GOAL
|
|
2
|
+
|
|
3
|
+
Adjust the content below to make it clean and readable:
|
|
4
|
+
Remove repeated strings that do not add value to the text.
|
|
5
|
+
|
|
6
|
+
Remove any content unrelated to the text itself (e.g., metadata, artifacts, or extraction errors).
|
|
7
|
+
|
|
8
|
+
Format the output as unstructured but clear text.
|
|
9
|
+
|
|
10
|
+
Do not add extra text, introductions, conclusions, or commentary—only rewrite the provided content as it is.
|
|
11
|
+
|
|
12
|
+
Do not interpret, analyze, or alter the meaning, intent, or narrative of the text—just reformat it for clarity and readability.
|
|
13
|
+
|
|
14
|
+
Do not change the text structure, do not write conclusions about it. Your only job is to make it readable.
|
|
15
|
+
|
|
16
|
+
Keep the text in its original language, regardless of what it is.
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
You are an AI assistant for a personal study platform.
|
|
2
|
+
|
|
3
|
+
In this platform, your user collects various articles and content from the Internet for reference and study.
|
|
4
|
+
|
|
5
|
+
Your role is to summarize the selected content as densely as possible, helping the reader extract maximum value from it without reading the full text.
|
|
6
|
+
Focus solely on the content's value, avoiding unnecessary comments or messages.
|
|
7
|
+
|
|
8
|
+
The summary should be dense, rich in characters, and designed to create a powerful vector representation.
|
|
9
|
+
If the user provided additional context, follow its instructions. Otherwise, summary the whole content.
|
|
10
|
+
|
|
11
|
+
Do not return any acknowledgments or greetings—only the summary.
|
|
12
|
+
|
|
13
|
+
CONTENT:
|
|
14
|
+
|
|
15
|
+
{{ content }}
|
|
16
|
+
|
|
17
|
+
{% if context %}
|
|
18
|
+
CONTEXT:
|
|
19
|
+
|
|
20
|
+
User has provided the aditional context for your task:
|
|
21
|
+
{{context}}
|
|
22
|
+
{% endif%}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
SUMMARY:
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: content-core
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Extract what matters from any media source
|
|
5
5
|
Author-email: LUIS NOVO <lfnovo@gmail.com>
|
|
6
6
|
License-File: LICENSE
|
|
7
7
|
Requires-Python: >=3.10
|
|
8
|
-
Requires-Dist: aiohttp>=3.11
|
|
8
|
+
Requires-Dist: aiohttp>=3.11
|
|
9
9
|
Requires-Dist: bs4>=0.0.2
|
|
10
10
|
Requires-Dist: dicttoxml>=1.7.16
|
|
11
11
|
Requires-Dist: esperanto>=1.2.0
|
|
@@ -43,7 +43,7 @@ The primary goal of Content Core is to simplify the process of ingesting content
|
|
|
43
43
|
* Direct text strings.
|
|
44
44
|
* Web URLs (using robust extraction methods).
|
|
45
45
|
* Local files (including automatic transcription for video/audio files and parsing for text-based formats).
|
|
46
|
-
* **Intelligent Processing:** Applies appropriate extraction techniques based on the source type.
|
|
46
|
+
* **Intelligent Processing:** Applies appropriate extraction techniques based on the source type. See the [Processors Documentation](./docs/processors.md) for detailed information on how different content types are handled.
|
|
47
47
|
* **Content Cleaning (Optional):** Likely integrates with LLMs (via `prompter.py` and Jinja templates) to refine and clean the extracted content.
|
|
48
48
|
* **Asynchronous:** Built with `asyncio` for efficient I/O operations.
|
|
49
49
|
|
|
@@ -220,6 +220,20 @@ OPENAI_API_KEY=your-key-here
|
|
|
220
220
|
GOOGLE_API_KEY=your-key-here
|
|
221
221
|
```
|
|
222
222
|
|
|
223
|
+
### Custom Prompt Templates
|
|
224
|
+
|
|
225
|
+
Content Core allows you to define custom prompt templates for content processing. By default, the library uses built-in prompts located in the `prompts` directory. However, you can create your own prompt templates and store them in a dedicated directory. To specify the location of your custom prompts, set the `PROMPT_PATH` environment variable in your `.env` file or system environment.
|
|
226
|
+
|
|
227
|
+
Example `.env` with custom prompt path:
|
|
228
|
+
|
|
229
|
+
```plaintext
|
|
230
|
+
OPENAI_API_KEY=your-key-here
|
|
231
|
+
GOOGLE_API_KEY=your-key-here
|
|
232
|
+
PROMPT_PATH=/path/to/your/custom/prompts
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
When a prompt template is requested, Content Core will first look in the custom directory specified by `PROMPT_PATH` (if set and exists). If the template is not found there, it will fall back to the default built-in prompts. This allows you to override specific prompts while still using the default ones for others.
|
|
236
|
+
|
|
223
237
|
## Development
|
|
224
238
|
|
|
225
239
|
To set up a development environment:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
content_core/__init__.py,sha256=CAPVVm3mDl8dH3j7Pn7t7UJqdwwpNjy77SzF7acssFw,6352
|
|
2
2
|
content_core/config.py,sha256=5nFWb-g7DG__OuxSvwK4yCFEC1YCKdE6-rX5Z7c6JSo,794
|
|
3
|
-
content_core/prompter.py,sha256=
|
|
3
|
+
content_core/prompter.py,sha256=oXDBww_V-_NR1rQvpEpZwf6NNBlsAMk-hj6yMdkKXRk,5729
|
|
4
4
|
content_core/py.typed,sha256=pLuU3XTTeVpXo4UomOjcvAIQqOrzIotlWlJ3KFo2lxQ,154
|
|
5
5
|
content_core/templated_message.py,sha256=NSttaX1jL5LYIDlJnabx7baDuySIFIPjFjAX5NZt9pM,1704
|
|
6
6
|
content_core/common/__init__.py,sha256=SjDp-0QRjX9PMubyTjv77_GrUqm6eC4gBuXr593JVK4,525
|
|
@@ -14,7 +14,7 @@ content_core/content/extraction/__init__.py,sha256=TaYw6CAcG62GZfsJxeZ6VJDLP85BU
|
|
|
14
14
|
content_core/content/extraction/graph.py,sha256=qrTEl9YDUJJJg7TbBqPjueSvV9oo4_WwAJ-VpWKOYec,4621
|
|
15
15
|
content_core/content/summary/__init__.py,sha256=ReKCZWKfDtqlInKeh87Y1DEfiNzVWabGybEz3hS2FrI,114
|
|
16
16
|
content_core/content/summary/core.py,sha256=BTzTYhB-5sQmvvDaxnnWwBYcVinMHcnG-fBApcf2cyg,517
|
|
17
|
-
content_core/notebooks/run.ipynb,sha256=
|
|
17
|
+
content_core/notebooks/run.ipynb,sha256=s4mIIiYdMfTlutaVlsYjFGwwvVnoVF83UbGT9rgizCA,340220
|
|
18
18
|
content_core/processors/audio.py,sha256=ox-ScigfbBrN9B4MCvdgbYn2d3GBYqf6His0HPrzXDs,3459
|
|
19
19
|
content_core/processors/office.py,sha256=13qNAfqqLwXUT6HNmF8OnxjbfvkhnTRCPoVUctw4w1k,12139
|
|
20
20
|
content_core/processors/pdf.py,sha256=yndt8EGvV5_IxcFbFp4lb4g9T84w6cJ4LPdiTduO7aM,5296
|
|
@@ -22,12 +22,14 @@ content_core/processors/text.py,sha256=MiXNILDKcLO5sWTzp-LNJ8yC764_gmUEvDB7GUd7W
|
|
|
22
22
|
content_core/processors/url.py,sha256=LcDB_FcmJMcwqM75DX2ERXcOry98e63WEjwd6l8u3ho,6268
|
|
23
23
|
content_core/processors/video.py,sha256=aEe3M_POENPwI1tK4mUNxSeGewsmjex7lvMmELdcQeo,5183
|
|
24
24
|
content_core/processors/youtube.py,sha256=3DioyLZT-wHmLcJ-vnONjMdZ1qWpeuHhQtsZw2nIE5M,5784
|
|
25
|
+
content_core/prompts/content/cleanup.jinja,sha256=elyjbm9O_AeOcxkG-kui5wjBIRiOQCicjm92I4NmoVA,693
|
|
26
|
+
content_core/prompts/content/summarize.jinja,sha256=zLPbomfjA-tQZr-c_rOqvKhd55R8NN3Q2gLyLR1sKso,817
|
|
25
27
|
content_core/tools/__init__.py,sha256=DuJmd7fE-NpDvLP8IW1XY5MUkAQcdks52rn2jk4N8jQ,231
|
|
26
28
|
content_core/tools/cleanup.py,sha256=5IdKedsFyRQMdYzgFSKtsfyxJldbroXQXHesHICNENI,523
|
|
27
29
|
content_core/tools/extract.py,sha256=-r2_jsuMMXyXxGVqWhh1ilNPo_UMYAbw3Pkp1FzPy5g,577
|
|
28
30
|
content_core/tools/summarize.py,sha256=DPfeglLWB08q8SvHrsKpOKZ35XjduUDs2J02ISwjdj0,596
|
|
29
|
-
content_core-0.
|
|
30
|
-
content_core-0.
|
|
31
|
-
content_core-0.
|
|
32
|
-
content_core-0.
|
|
33
|
-
content_core-0.
|
|
31
|
+
content_core-0.2.0.dist-info/METADATA,sha256=fXtKo9M6oBAxKPDavDDtMRPyptAqgcK8Qi_PKR1xMTo,8390
|
|
32
|
+
content_core-0.2.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
33
|
+
content_core-0.2.0.dist-info/entry_points.txt,sha256=9fGQUk6bxBVXj9PRwfWVPn54ClSEJV7J-KBLXtjOhQw,99
|
|
34
|
+
content_core-0.2.0.dist-info/licenses/LICENSE,sha256=myj0z2T4qIkenCgLsRfx7Wk6UqCQNj5c7O14Qx4zpGg,1066
|
|
35
|
+
content_core-0.2.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|