PyPI - content-core - Versions diffs - 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

content-core 0.5.0py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of content-core might be problematic. Click here for more details.

Files changed (9) hide show

content_core/__init__.py CHANGED Viewed

@@ -5,9 +5,12 @@ import os
 import sys
 from xml.etree import ElementTree as ET
-from dicttoxml import dicttoxml  # type: ignore
 from dotenv import load_dotenv
+load_dotenv()
+from dicttoxml import dicttoxml  # type: ignore
 from content_core.common import ProcessSourceInput
 from content_core.content.cleanup import cleanup_content
 from content_core.content.extraction import extract_content
@@ -18,7 +21,6 @@ from content_core.logging import configure_logging, logger
 extract = extract_content
 clean = cleanup_content
-load_dotenv()
 # Configure loguru logger using centralized configuration
 configure_logging(debug=False)
@@ -212,3 +214,5 @@ def csum():
 if __name__ == "__main__":
     ccore()
+if __name__ == "__main__":
+    ccore()

content_core/notebooks/run.ipynb CHANGED Viewed

@@ -305,7 +305,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [
     {
@@ -328,18 +328,18 @@
     }
    ],
    "source": [
-    "from content_core.config import set_extraction_engine, set_docling_output_format\n",
+    "# from content_core.config import set_extraction_engine, set_docling_output_format\n",
     "from content_core.content.extraction import extract_content\n",
     "\n",
-    "# 2) Turn on Docling\n",
-    "set_extraction_engine(\"docling\")\n",
+    "# # 2) Turn on Docling\n",
+    "# set_extraction_engine(\"docling\")\n",
     "\n",
-    "# 3) (Optionally) pick your format – markdown, html or json\n",
-    "set_docling_output_format(\"markdown\")\n",
+    "# # 3) (Optionally) pick your format – markdown, html or json\n",
+    "# set_docling_output_format(\"markdown\")\n",
     "\n",
     "# 4) Now extract exactly as before:\n",
     "result = await extract_content({\n",
-    "    \"file_path\": \"../../../tests/input_content/file.pdf\"\n",
+    "    \"file_path\": \"../../../tests/input_content/file.pdf\", \"engine\": \"docling\", \"output_format\": \"markdown\"\n",
     "})\n",
     "print(result.content)"
    ]

content_core/templated_message.py CHANGED Viewed

@@ -1,18 +1,18 @@
 from typing import Dict, Optional, Union
+from ai_prompter import Prompter
 from esperanto import LanguageModel
 from esperanto.common_types import Message
 from pydantic import BaseModel, Field
 from content_core.models import ModelFactory
-from content_core.prompter import Prompter
 class TemplatedMessageInput(BaseModel):
-    system_prompt_template: Optional[str] = ""
-    system_prompt_text: Optional[str] = ""
-    user_prompt_template: Optional[str] = ""
-    user_prompt_text: Optional[str] = ""
+    system_prompt_template: Optional[str] = None
+    system_prompt_text: Optional[str] = None
+    user_prompt_template: Optional[str] = None
+    user_prompt_text: Optional[str] = None
     data: Optional[Union[Dict, BaseModel]] = Field(default_factory=lambda: {})
     config: Dict = Field(
         description="The config for the LLM",
@@ -28,30 +28,22 @@ async def templated_message(
     input: TemplatedMessageInput, model: Optional[LanguageModel] = None
 ) -> str:
     if not model:
-        model = ModelFactory.get_model('default_model')
+        model = ModelFactory.get_model("default_model")
     msgs = []
     if input.system_prompt_template or input.system_prompt_text:
-        msgs.append(
-            Message(
-                role="system",
-                content=Prompter(
-                    prompt_template=input.system_prompt_template,
-                    prompt_text=input.system_prompt_text,
-                ).render(data=input.data),
-            )
-        )
+        system_prompt = Prompter(
+            prompt_template=input.system_prompt_template,
+            template_text=input.system_prompt_text,
+        ).render(data=input.data)
+        msgs.append(Message(role="system", content=system_prompt))
     if input.user_prompt_template or input.user_prompt_text:
-        msgs.append(
-            Message(
-                role="user",
-                content=Prompter(
-                    prompt_template=input.user_prompt_template,
-                    prompt_text=input.user_prompt_text,
-                ).render(data=input.data),
-            )
-        )
+        user_prompt = Prompter(
+            prompt_template=input.user_prompt_template,
+            template_text=input.user_prompt_text,
+        ).render(data=input.data)
+        msgs.append(Message(role="user", content=user_prompt))
     result = await model.achat_complete(msgs)
     return result.content

{content_core-0.5.0.dist-info → content_core-0.5.1.dist-info}/METADATA RENAMED Viewed

@@ -1,10 +1,11 @@
 Metadata-Version: 2.4
 Name: content-core
-Version: 0.5.0
+Version: 0.5.1
 Summary: Extract what matters from any media source
 Author-email: LUIS NOVO <lfnovo@gmail.com>
 License-File: LICENSE
 Requires-Python: >=3.10
+Requires-Dist: ai-prompter>=0.2.3
 Requires-Dist: aiohttp>=3.11
 Requires-Dist: bs4>=0.0.2
 Requires-Dist: dicttoxml>=1.7.16

{content_core-0.5.0.dist-info → content_core-0.5.1.dist-info}/RECORD RENAMED Viewed

@@ -1,12 +1,11 @@
-content_core/__init__.py,sha256=sBCcvRJ-9u5htV5AdptlYPNO0R8NmAex2K1XAkJAoL0,6474
+content_core/__init__.py,sha256=ANKeslNXOGumwrkjqgRik23e5PdGps2C0FSup8_XH2Y,6515
 content_core/cc_config.yaml,sha256=w66fo5ut6TPaU3o4hkjnroqg2hkr8YuOG3BRtI50j1s,701
 content_core/config.py,sha256=-aUsTB6Z3fa_XIWdHNXhMgWkVLWjEW1kfyQXXB_-j54,1632
 content_core/logging.py,sha256=oeRdWKknEolptopxF1IvnEGEc0ZUw45QXYUEZ71GcdY,438
 content_core/models.py,sha256=FBV_tV6cmI0F82WfcA6xHag-YMsxI1dIbDGWG-3Eq_Y,935
 content_core/models_config.yaml,sha256=Yr-GS94ffxnkaWojUfpErUMM7m_MShsYjR6QuDjMzwo,444
-content_core/prompter.py,sha256=-ShuSyHvK50xlgsAFfA9AnAJV-LlzWwmbPDq2wUZRcI,5793
 content_core/py.typed,sha256=pLuU3XTTeVpXo4UomOjcvAIQqOrzIotlWlJ3KFo2lxQ,154
-content_core/templated_message.py,sha256=iWz-TwWq08mspgZW3EgIGf7HqtW1tXuTDpo9FkNwixQ,1729
+content_core/templated_message.py,sha256=KbI2rcvgGM5oRIcsG68zAZfgNsC97fR16D61683ZSnY,1617
 content_core/common/__init__.py,sha256=SjDp-0QRjX9PMubyTjv77_GrUqm6eC4gBuXr593JVK4,525
 content_core/common/exceptions.py,sha256=NpYedVbckIq4kP2wek7bicMVgGGn0fkhCvid5cIxfy4,1304
 content_core/common/state.py,sha256=cJvIwqvrvGxuk1t51bTOvPV-RM5Nbd8F8C4o0dawIXo,1185
@@ -19,7 +18,7 @@ content_core/content/extraction/graph.py,sha256=Sp9XJ6AoLXA_FUFWhmfTMzOC2gkarp1Q
 content_core/content/summary/__init__.py,sha256=ReKCZWKfDtqlInKeh87Y1DEfiNzVWabGybEz3hS2FrI,114
 content_core/content/summary/core.py,sha256=LejUbPxnRD0sbO6MupiIb-IHLxEUGU5beBZwmIiBncc,542
 content_core/notebooks/docling.ipynb,sha256=aTad8NORNd-TUMlbX58DURJ4-QCeplTeTT0vUj301m0,631
-content_core/notebooks/run.ipynb,sha256=vmOYratdx0MnhNChjq3I5b7K2iYWuqO2dECK4Dp0jbU,369422
+content_core/notebooks/run.ipynb,sha256=lV8n1fx_kgIQHBnk1vR6ChBjMS5luAEuDDljsTBNjrQ,369490
 content_core/processors/audio.py,sha256=jDn0_6F5dLcmz_C-iR80uOqOIAz49ELya2R5JeM15vo,3538
 content_core/processors/docling.py,sha256=wQ8ThAcyrCy-c95QtgplQ9UZtjCZTddLD9y1_CrRtSQ,2111
 content_core/processors/office.py,sha256=DXkfmjqUhmhP6rJaO5Z5Y9sv-iK0zaPZ3waynFIPtsk,12153
@@ -34,8 +33,8 @@ content_core/tools/__init__.py,sha256=DuJmd7fE-NpDvLP8IW1XY5MUkAQcdks52rn2jk4N8j
 content_core/tools/cleanup.py,sha256=5IdKedsFyRQMdYzgFSKtsfyxJldbroXQXHesHICNENI,523
 content_core/tools/extract.py,sha256=-r2_jsuMMXyXxGVqWhh1ilNPo_UMYAbw3Pkp1FzPy5g,577
 content_core/tools/summarize.py,sha256=DPfeglLWB08q8SvHrsKpOKZ35XjduUDs2J02ISwjdj0,596
-content_core-0.5.0.dist-info/METADATA,sha256=3im9n4tqCrStAX1UkdR42NnODwwHggKeQJdYL_eX68U,10499
-content_core-0.5.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-content_core-0.5.0.dist-info/entry_points.txt,sha256=9fGQUk6bxBVXj9PRwfWVPn54ClSEJV7J-KBLXtjOhQw,99
-content_core-0.5.0.dist-info/licenses/LICENSE,sha256=myj0z2T4qIkenCgLsRfx7Wk6UqCQNj5c7O14Qx4zpGg,1066
-content_core-0.5.0.dist-info/RECORD,,
+content_core-0.5.1.dist-info/METADATA,sha256=mkvdVcLsiBDGiobgswCVQF8Xkceq5VpIRZspniB61PY,10533
+content_core-0.5.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+content_core-0.5.1.dist-info/entry_points.txt,sha256=9fGQUk6bxBVXj9PRwfWVPn54ClSEJV7J-KBLXtjOhQw,99
+content_core-0.5.1.dist-info/licenses/LICENSE,sha256=myj0z2T4qIkenCgLsRfx7Wk6UqCQNj5c7O14Qx4zpGg,1066
+content_core-0.5.1.dist-info/RECORD,,

content_core/prompter.py DELETED Viewed

@@ -1,159 +0,0 @@
-"""
-A prompt management module using Jinja to generate complex prompts with simple templates.
-"""
-import os
-from dataclasses import dataclass
-from datetime import datetime
-from typing import Any, Dict, Optional, Union
-from dotenv import load_dotenv
-from jinja2 import Environment, FileSystemLoader, Template
-from langchain_core.prompts import ChatPromptTemplate
-from pydantic import BaseModel
-from content_core.logging import logger
-load_dotenv()
-prompt_path_default = os.path.join(
-    os.path.dirname(os.path.abspath(__file__)), "prompts"
-)
-prompt_path_custom = os.getenv("PROMPT_PATH")
-logger.debug(
-    f"Pasta de prompts personalizada: {prompt_path_custom if prompt_path_custom else 'Não definida'}"
-)
-logger.debug(f"Pasta de prompts padrão: {prompt_path_default}")
-env_custom = (
-    Environment(loader=FileSystemLoader(prompt_path_custom))
-    if prompt_path_custom and os.path.exists(prompt_path_custom)
-    else None
-)
-env_default = Environment(loader=FileSystemLoader(prompt_path_default))
-@dataclass
-class Prompter:
-    """
-    A class for managing and rendering prompt templates.
-    Attributes:
-        prompt_template (str, optional): The name of the prompt template file.
-        prompt_variation (str, optional): The variation of the prompt template.
-        prompt_text (str, optional): The raw prompt text.
-        template (Union[str, Template], optional): The Jinja2 template object.
-    """
-    prompt_template: Optional[str] = None
-    prompt_variation: Optional[str] = "default"
-    prompt_text: Optional[str] = None
-    template: Optional[Union[str, Template]] = None
-    parser: Optional[Any] = None
-    def __init__(self, prompt_template=None, prompt_text=None, parser=None):
-        """
-        Initialize the Prompter with either a template file or raw text.
-        Args:
-            prompt_template (str, optional): The name of the prompt template file.
-            prompt_text (str, optional): The raw prompt text.
-        """
-        self.prompt_template = prompt_template
-        self.prompt_text = prompt_text
-        self.parser = parser
-        self.setup()
-    def setup(self):
-        """
-        Set up the Jinja2 template based on the provided template file or text.
-        Raises:
-            ValueError: If neither prompt_template nor prompt_text is provided.
-        """
-        if self.prompt_template:
-            # Primeiro tenta carregar da pasta personalizada, se disponível
-            if env_custom:
-                try:
-                    self.template = env_custom.get_template(
-                        f"{self.prompt_template}.jinja"
-                    )
-                    logger.debug(
-                        f"Template {self.prompt_template} carregado da pasta personalizada"
-                    )
-                    return
-                except Exception as e:
-                    logger.debug(
-                        f"Template {self.prompt_template} não encontrado na pasta personalizada: {e}"
-                    )
-            # Se não encontrou na personalizada ou não há pasta personalizada, tenta a padrão
-            try:
-                self.template = env_default.get_template(
-                    f"{self.prompt_template}.jinja"
-                )
-                logger.debug(
-                    f"Template {self.prompt_template} carregado da pasta padrão"
-                )
-            except Exception as e:
-                raise ValueError(
-                    f"Template {self.prompt_template} não encontrado na pasta padrão: {e}"
-                )
-        elif self.prompt_text:
-            self.template = Template(self.prompt_text)
-        else:
-            raise ValueError("Prompter must have a prompt_template or prompt_text")
-        assert self.prompt_template or self.prompt_text, "Prompt is required"
-    def to_langchain(self):
-        if isinstance(self.template, str):
-            template_text = self.template
-        else:
-            # For file-based templates, read the raw content
-            template_path = os.path.join("prompts", f"{self.prompt_template}.jinja")
-            with open(template_path, "r") as f:
-                template_text = f.read()
-        return ChatPromptTemplate.from_template(template_text, template_format="jinja2")
-    @classmethod
-    def from_text(cls, text: str):
-        """
-        Create a Prompter instance from raw text, which can contain Jinja code.
-        Args:
-            text (str): The raw prompt text.
-        Returns:
-            Prompter: A new Prompter instance.
-        """
-        return cls(prompt_text=text)
-    def render(self, data: Optional[Union[Dict, BaseModel]] = {}) -> str:
-        """
-        Render the prompt template with the given data.
-        Args:
-            data (Union[Dict, BaseModel]): The data to be used in rendering the template.
-                Can be either a dictionary or a Pydantic BaseModel.
-        Returns:
-            str: The rendered prompt text.
-        Raises:
-            AssertionError: If the template is not defined or not a Jinja2 Template.
-        """
-        # Convert Pydantic model to dict if necessary
-        data_dict = data.model_dump() if isinstance(data, BaseModel) else data
-        # Create a new mutable dictionary with the original data
-        render_data = dict(data_dict)
-        render_data["current_time"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-        if self.parser:
-            render_data["format_instructions"] = self.parser.get_format_instructions()
-        assert self.template, "Prompter template is not defined"
-        assert isinstance(
-            self.template, Template
-        ), "Prompter template is not a Jinja2 Template"
-        return self.template.render(render_data)
-        return self.template.render(render_data)

{content_core-0.5.0.dist-info → content_core-0.5.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{content_core-0.5.0.dist-info → content_core-0.5.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{content_core-0.5.0.dist-info → content_core-0.5.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

content-core 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

Potentially problematic release.

content-core 0.5.0py3-none-any.whl → 0.5.1py3-none-any.whl