PyPI - MindsDB - Versions diffs - 25.7.2.0__py3-none-any.whl → 25.7.3.0__py3-none-any.whl - Mend

MindsDB 25.7.2.0py3-none-any.whl → 25.7.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (23) hide show

mindsdb/integrations/handlers/youtube_handler/youtube_tables.py CHANGED Viewed

@@ -7,7 +7,7 @@ from mindsdb_sql_parser import ast
 from mindsdb.integrations.utilities.handlers.query_utilities import (
     SELECTQueryParser,
     SELECTQueryExecutor,
-    INSERTQueryParser
+    INSERTQueryParser,
 )
 import pandas as pd
@@ -66,9 +66,13 @@ class YoutubeCommentsTable(APITable):
         select_statement_executor = SELECTQueryExecutor(
             comments_df,
             selected_columns,
-            [where_condition for where_condition in where_conditions if where_condition[1] not in ['video_id', 'channel_id']],
+            [
+                where_condition
+                for where_condition in where_conditions
+                if where_condition[1] not in ["video_id", "channel_id"]
+            ],
             order_by_conditions,
-            result_limit if query.limit else None
+            result_limit if query.limit else None,
         )
         comments_df = select_statement_executor.execute_query()
@@ -98,50 +102,30 @@ class YoutubeCommentsTable(APITable):
         values_to_insert = insert_query_parser.parse_query()
         for value in values_to_insert:
-            if not value.get('comment_id'):
-                if not value.get('comment'):
+            if not value.get("comment_id"):
+                if not value.get("comment"):
                     raise ValueError("comment is mandatory for inserting a top-level comment.")
                 else:
-                    self.insert_comment(video_id=value['video_id'], text=value['comment'])
+                    self.insert_comment(video_id=value["video_id"], text=value["comment"])
             else:
-                if not value.get('reply'):
+                if not value.get("reply"):
                     raise ValueError("reply is mandatory for inserting a reply.")
                 else:
-                    self.insert_comment(comment_id=value['comment_id'], text=value['reply'])
+                    self.insert_comment(comment_id=value["comment_id"], text=value["reply"])
     def insert_comment(self, text, video_id: str = None, comment_id: str = None):
         # if comment_id is provided, define the request body for a reply and insert it
         if comment_id:
-            request_body = {
-                'snippet': {
-                    'parentId': comment_id,
-                    'textOriginal': text
-                }
-            }
+            request_body = {"snippet": {"parentId": comment_id, "textOriginal": text}}
-            self.handler.connect().comments().insert(
-                part='snippet',
-                body=request_body
-            ).execute()
+            self.handler.connect().comments().insert(part="snippet", body=request_body).execute()
         # else if video_id is provided, define the request body for a top-level comment and insert it
         elif video_id:
-            request_body = {
-                'snippet': {
-                    'topLevelComment': {
-                        'snippet': {
-                            'videoId': video_id,
-                            'textOriginal': text
-                        }
-                    }
-                }
-            }
+            request_body = {"snippet": {"topLevelComment": {"snippet": {"videoId": video_id, "textOriginal": text}}}}
-            self.handler.connect().commentThreads().insert(
-                part='snippet',
-                body=request_body
-            ).execute()
+            self.handler.connect().commentThreads().insert(part="snippet", body=request_body).execute()
     def get_columns(self) -> List[str]:
         """Gets all columns to be returned in pandas DataFrame responses
@@ -150,7 +134,19 @@ class YoutubeCommentsTable(APITable):
         List[str]
             List of columns
         """
-        return ['comment_id', 'channel_id', 'video_id', 'user_id', 'display_name', 'comment', "published_at", "updated_at", 'reply_user_id', 'reply_author', 'reply']
+        return [
+            "comment_id",
+            "channel_id",
+            "video_id",
+            "user_id",
+            "display_name",
+            "comment",
+            "published_at",
+            "updated_at",
+            "reply_user_id",
+            "reply_author",
+            "reply",
+        ]
     def get_comments(self, video_id: str, channel_id: str):
         """Pulls all the records from the given youtube api end point and returns it select()
@@ -166,7 +162,12 @@ class YoutubeCommentsTable(APITable):
         resource = (
             self.handler.connect()
             .commentThreads()
-            .list(part="snippet, replies", videoId=video_id, allThreadsRelatedToChannelId=channel_id, textFormat="plainText")
+            .list(
+                part="snippet, replies",
+                videoId=video_id,
+                allThreadsRelatedToChannelId=channel_id,
+                textFormat="plainText",
+            )
         )
         data = []
@@ -175,7 +176,7 @@ class YoutubeCommentsTable(APITable):
             for comment in comments["items"]:
                 replies = []
-                if 'replies' in comment:
+                if "replies" in comment:
                     for reply in comment["replies"]["comments"]:
                         replies.append(
                             {
@@ -222,18 +223,51 @@ class YoutubeCommentsTable(APITable):
             else:
                 break
-        youtube_comments_df = pd.json_normalize(data, 'replies', ['comment_id', 'channel_id', 'video_id', 'user_id', 'display_name', 'comment', "published_at", "updated_at"], record_prefix='replies.')
-        youtube_comments_df = youtube_comments_df.rename(columns={'replies.user_id': 'reply_user_id', 'replies.reply_author': 'reply_author', 'replies.reply': 'reply'})
+        youtube_comments_df = pd.json_normalize(
+            data,
+            "replies",
+            [
+                "comment_id",
+                "channel_id",
+                "video_id",
+                "user_id",
+                "display_name",
+                "comment",
+                "published_at",
+                "updated_at",
+            ],
+            record_prefix="replies.",
+        )
+        youtube_comments_df = youtube_comments_df.rename(
+            columns={
+                "replies.user_id": "reply_user_id",
+                "replies.reply_author": "reply_author",
+                "replies.reply": "reply",
+            }
+        )
         # check if DataFrame is empty
         if youtube_comments_df.empty:
             return youtube_comments_df
         else:
-            return youtube_comments_df[['comment_id', 'channel_id', 'video_id', 'user_id', 'display_name', 'comment', "published_at", "updated_at", 'reply_user_id', 'reply_author', 'reply']]
+            return youtube_comments_df[
+                [
+                    "comment_id",
+                    "channel_id",
+                    "video_id",
+                    "user_id",
+                    "display_name",
+                    "comment",
+                    "published_at",
+                    "updated_at",
+                    "reply_user_id",
+                    "reply_author",
+                    "reply",
+                ]
+            ]
 class YoutubeChannelsTable(APITable):
     """Youtube Channel Info  by channel id Table implementation"""
     def select(self, query: ast.Select) -> pd.DataFrame:
@@ -263,9 +297,9 @@ class YoutubeChannelsTable(APITable):
         select_statement_executor = SELECTQueryExecutor(
             channel_df,
             selected_columns,
-            [where_condition for where_condition in where_conditions if where_condition[1] == 'channel_id'],
+            [where_condition for where_condition in where_conditions if where_condition[1] == "channel_id"],
             order_by_conditions,
-            result_limit if query.limit else None
+            result_limit if query.limit else None,
         )
         channel_df = select_statement_executor.execute_query()
@@ -304,7 +338,6 @@ class YoutubeChannelsTable(APITable):
 class YoutubeVideosTable(APITable):
     """Youtube Video info  by video id Table implementation"""
     def select(self, query: ast.Select) -> pd.DataFrame:
@@ -317,7 +350,7 @@ class YoutubeVideosTable(APITable):
             result_limit,
         ) = select_statement_parser.parse_query()
-        video_id, channel_id = None, None
+        video_id, channel_id, search_query = None, None, None
         for op, arg1, arg2 in where_conditions:
             if arg1 == "video_id":
                 if op == "=":
@@ -331,38 +364,126 @@ class YoutubeVideosTable(APITable):
                 else:
                     raise NotImplementedError("Only '=' operator is supported for channel_id column.")
-        if not video_id and not channel_id:
-            raise ValueError("Either video_id or channel_id has to be present in where clause.")
+            elif arg1 == "query":
+                if op == "=":
+                    search_query = arg2
+                else:
+                    raise NotImplementedError("Only '=' operator is supported for query column.")
+        if not video_id and not channel_id and not search_query:
+            raise ValueError("At least one of video_id, channel_id, or query must be present in the WHERE clause.")
         if video_id:
             video_df = self.get_videos_by_video_ids([video_id])
+        elif channel_id and search_query:
+            video_df = self.get_videos_by_search_query_in_channel(search_query, channel_id, result_limit)
+        elif channel_id:
+            video_df = self.get_videos_by_channel_id(channel_id, result_limit)
         else:
-            video_df = self.get_videos_by_channel_id(channel_id)
+            video_df = self.get_videos_by_search_query(search_query, result_limit)
         select_statement_executor = SELECTQueryExecutor(
             video_df,
             selected_columns,
-            [where_condition for where_condition in where_conditions if where_condition[1] not in ['video_id', 'channel_id']],
+            [
+                where_condition
+                for where_condition in where_conditions
+                if where_condition[1] not in ["video_id", "channel_id", "query"]
+            ],
             order_by_conditions,
-            result_limit if query.limit else None
+            result_limit if query.limit else None,
         )
         video_df = select_statement_executor.execute_query()
         return video_df
-    def get_videos_by_channel_id(self, channel_id):
+    def get_videos_by_search_query(self, search_query, limit=10):
         video_ids = []
         resource = (
             self.handler.connect()
             .search()
-            .list(part="snippet", channelId=channel_id, type="video")
+            .list(part="snippet", q=search_query, type="video", maxResults=min(50, limit))
         )
-        while resource:
+        total_fetched = 0
+        while resource and total_fetched < limit:
+            response = resource.execute()
+            for item in response["items"]:
+                video_ids.append(item["id"]["videoId"])
+                total_fetched += 1
+                if total_fetched >= limit:
+                    break
+            if "nextPageToken" in response and total_fetched < limit:
+                resource = (
+                    self.handler.connect()
+                    .search()
+                    .list(
+                        part="snippet",
+                        q=search_query,
+                        type="video",
+                        maxResults=min(50, limit - total_fetched),
+                        pageToken=response["nextPageToken"],
+                    )
+                )
+            else:
+                break
+        return self.get_videos_by_video_ids(video_ids)
+    def get_videos_by_search_query_in_channel(self, search_query, channel_id, limit=10):
+        """Search for videos within a specific channel"""
+        video_ids = []
+        resource = (
+            self.handler.connect()
+            .search()
+            .list(part="snippet", q=search_query, channelId=channel_id, type="video", maxResults=min(50, limit))
+        )
+        total_fetched = 0
+        while resource and total_fetched < limit:
+            response = resource.execute()
+            for item in response["items"]:
+                video_ids.append(item["id"]["videoId"])
+                total_fetched += 1
+                if total_fetched >= limit:
+                    break
+            if "nextPageToken" in response and total_fetched < limit:
+                resource = (
+                    self.handler.connect()
+                    .search()
+                    .list(
+                        part="snippet",
+                        q=search_query,
+                        channelId=channel_id,
+                        type="video",
+                        maxResults=min(50, limit - total_fetched),
+                        pageToken=response["nextPageToken"],
+                    )
+                )
+            else:
+                break
+        return self.get_videos_by_video_ids(video_ids)
+    def get_videos_by_channel_id(self, channel_id, limit=10):
+        video_ids = []
+        resource = (
+            self.handler.connect()
+            .search()
+            .list(part="snippet", channelId=channel_id, type="video", maxResults=min(50, limit))
+        )
+        total_fetched = 0
+        while resource and total_fetched < limit:
             response = resource.execute()
             for item in response["items"]:
                 video_ids.append(item["id"]["videoId"])
-            if "nextPageToken" in response:
+                total_fetched += 1
+                if total_fetched >= limit:
+                    break
+            if "nextPageToken" in response and total_fetched < limit:
                 resource = (
                     self.handler.connect()
                     .search()
@@ -370,6 +491,7 @@ class YoutubeVideosTable(APITable):
                         part="snippet",
                         channelId=channel_id,
                         type="video",
+                        maxResults=min(50, limit - total_fetched),
                         pageToken=response["nextPageToken"],
                     )
                 )
@@ -388,7 +510,13 @@ class YoutubeVideosTable(APITable):
         # loop over 50 video ids at a time
         # an invalid request error is caused otherwise
         for i in range(0, len(video_ids), 50):
-            resource = self.handler.connect().videos().list(part="statistics,snippet,contentDetails", id=",".join(video_ids[i:i + 50])).execute()
+            resource = (
+                self.handler.connect()
+                .videos()
+                .list(part="statistics,snippet,contentDetails", id=",".join(video_ids[i : i + 50]))
+                .execute()
+            )
             for item in resource["items"]:
                 data.append(
                     {
@@ -415,7 +543,7 @@ class YoutubeVideosTable(APITable):
             return json_formatted_transcript
         except Exception as e:
-            logger.error(f"Encountered an error while fetching transcripts for video ${video_id}: ${e}"),
+            (logger.error(f"Encountered an error while fetching transcripts for video ${video_id}: ${e}"),)
             return "Transcript not available for this video"
     def parse_duration(self, video_id, duration):
@@ -428,7 +556,7 @@ class YoutubeVideosTable(APITable):
             return duration_str.strip(":")
         except Exception as e:
-            logger.error(f"Encountered an error while parsing duration for video ${video_id}: ${e}"),
+            (logger.error(f"Encountered an error while parsing duration for video ${video_id}: ${e}"),)
             return "Duration not available for this video"
     def get_columns(self) -> List[str]:

mindsdb/interfaces/agents/agents_controller.py CHANGED Viewed

@@ -180,7 +180,7 @@ class AgentsController:
             agent (db.Agents): The created agent
         Raises:
-            ValueError: Agent with given name already exists, or skill/model with given name does not exist.
+            EntityExistsError: Agent with given name already exists, or skill/model with given name does not exist.
         """
         if project_name is None:
             project_name = default_project
@@ -189,7 +189,7 @@ class AgentsController:
         agent = self.get_agent(name, project_name)
         if agent is not None:
-            raise ValueError(f"Agent with name already exists: {name}")
+            raise EntityExistsError("Agent already exists", name)
         # No need to copy params since we're not preserving the original reference
         params = params or {}

mindsdb/interfaces/data_catalog/data_catalog_loader.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from typing import List, Union
 import pandas as pd
+import json
+import datetime
 from mindsdb.integrations.libs.response import RESPONSE_TYPE
 from mindsdb.interfaces.data_catalog.base_data_catalog import BaseDataCatalog
 from mindsdb.interfaces.storage import db
@@ -204,6 +204,8 @@ class DataCatalogLoader(BaseDataCatalog):
                 # Convert the distinct_values_count to an integer if it is not NaN, otherwise set it to None.
                 val = row.get("distinct_values_count")
                 distinct_values_count = int(val) if pd.notna(val) else None
+                min_val = row.get("minimum_value")
+                max_val = row.get("maximum_value")
                 # Convert the most_common_frequencies to a list of strings.
                 most_common_frequencies = [str(val) for val in row.get("most_common_frequencies") or []]
@@ -214,8 +216,8 @@ class DataCatalogLoader(BaseDataCatalog):
                     most_common_frequencies=most_common_frequencies,
                     null_percentage=row.get("null_percentage"),
                     distinct_values_count=distinct_values_count,
-                    minimum_value=row.get("minimum_value"),
-                    maximum_value=row.get("maximum_value"),
+                    minimum_value=self.to_str(min_val),
+                    maximum_value=self.to_str(max_val),
                 )
                 column_statistics.append(record)
@@ -373,3 +375,15 @@ class DataCatalogLoader(BaseDataCatalog):
             db.session.delete(table)
         db.session.commit()
         self.logger.info(f"Metadata for {self.database_name} removed successfully.")
+    def to_str(self, val) -> str:
+        """
+        Convert a value to a string.
+        """
+        if val is None:
+            return None
+        if isinstance(val, (datetime.datetime, datetime.date)):
+            return val.isoformat()
+        if isinstance(val, (list, dict, set, tuple)):
+            return json.dumps(val, default=str)
+        return str(val)

mindsdb/interfaces/knowledge_base/controller.py CHANGED Viewed

@@ -1186,6 +1186,13 @@ class KnowledgeBaseController:
         if "provider" not in params:
             raise ValueError("'provider' parameter is required for embedding model")
+        # check available providers
+        avail_providers = ("openai", "azure_openai", "bedrock", "gemini", "google")
+        if params["provider"] not in avail_providers:
+            raise ValueError(
+                f"Wrong embedding provider: {params['provider']}. Available providers: {', '.join(avail_providers)}"
+            )
         if params["provider"] not in ("openai", "azure_openai"):
             # try use litellm
             try:

mindsdb/interfaces/knowledge_base/evaluate.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import json
 import math
+import re
 import time
 from typing import List
@@ -16,15 +17,15 @@ logger = log.getLogger(__name__)
 GENERATE_QA_SYSTEM_PROMPT = """
-Your task is to generate question and answer pairs for a search engine.
+Your task is to generate question and answer pairs for a search engine.
 The search engine will take your query and return a list of documents.
 You will be given a text and you need to generate a question that can be answered using the information in the text.
 Your questions will be used to evaluate the search engine.
-Question should always have enough clues to identify the specific text that this question is generated from.
+Question should always have enough clues to identify the specific text that this question is generated from.
 Never ask questions like "What license number is associated with Amend 6" because Amend 6 could be found in many documents and the question is not specific enough.
-Example output 1:  {\"query\": \"What processor does the HP 2023 14\" FHD IPS Laptop use?\", \"reference_answer\": \"Ryzen 3 5300U\"}
+Example output 1:  {\"query\": \"What processor does the HP 2023 14\" FHD IPS Laptop use?\", \"reference_answer\": \"Ryzen 3 5300U\"}
 Example output 2: {\"query\": \"What is the name of the river in Paris?\", \"reference_answer\": \"Seine\"}
-Don't generate questions like "What is being amended in the application?" because these questions cannot be answered using the text and without knowing which document it refers to.
+Don't generate questions like "What is being amended in the application?" because these questions cannot be answered using the text and without knowing which document it refers to.
 The question should be answerable without the text, but the answer should be present in the text.
 Return ONLY a json response. No other text.
 """
@@ -43,6 +44,39 @@ def calc_entropy(values: List[float]) -> float:
     return -sum([pk * math.log(pk) for pk in values])
+def sanitize_json_response(response: str) -> str:
+    """Remove markdown code block formatting from JSON response and extract valid JSON."""
+    if not response or not response.strip():
+        raise ValueError("Empty response provided.")
+    # Remove leading/trailing whitespace
+    response = response.strip()
+    # Remove markdown code block markers if present
+    response = re.sub(r"^```(?:json|JSON)?\s*", "", response, flags=re.MULTILINE)
+    response = re.sub(r"\s*```$", "", response, flags=re.MULTILINE)
+    response = response.strip()
+    # Find the first opening brace
+    start_idx = response.find("{")
+    if start_idx == -1:
+        raise ValueError("No JSON object found in the response.")
+    # Try to parse JSON starting from first { with increasing end positions
+    # This handles nested objects and strings with braces correctly
+    for end_idx in range(len(response), start_idx, -1):  # Start from end and work backwards
+        candidate = response[start_idx:end_idx]
+        try:
+            parsed = json.loads(candidate)
+            # Ensure it's a dictionary (object) not just any valid JSON
+            if isinstance(parsed, dict):
+                return candidate
+        except json.JSONDecodeError:
+            continue
+    raise ValueError("No valid JSON object found in the response.")
 class EvaluateBase:
     DEFAULT_QUESTION_COUNT = 20
     DEFAULT_SAMPLE_SIZE = 10000
@@ -178,6 +212,7 @@ class EvaluateBase:
         test_data = self.read_from_table(test_table)
         scores = self.evaluate(test_data)
+        scores["id"] = math.floor(time.time())  # unique ID for the evaluation run
         scores["name"] = self.name
         scores["created_at"] = dt.datetime.now()
@@ -237,9 +272,13 @@ class EvaluateRerank(EvaluateBase):
             {"role": "system", "content": GENERATE_QA_SYSTEM_PROMPT},
             {"role": "user", "content": f"\n\nText:\n{text}\n\n"},
         ]
-        answer = self.llm_client.completion(messages)
+        answer = self.llm_client.completion(messages, json_output=True)
+        # Sanitize the response by removing markdown code block formatting like ```json
+        sanitized_answer = sanitize_json_response(answer)
         try:
-            output = json.loads(answer)
+            output = json.loads(sanitized_answer)
         except json.JSONDecodeError:
             raise ValueError(f"Could not parse response from LLM: {answer}")
@@ -448,9 +487,13 @@ class EvaluateDocID(EvaluateBase):
             {"role": "system", "content": GENERATE_QA_SYSTEM_PROMPT},
             {"role": "user", "content": f"\n\nText:\n{text}\n\n"},
         ]
-        answer = self.llm_client.completion(messages)
+        answer = self.llm_client.completion(messages, json_output=True)
+        # Sanitize the response by removing markdown code block formatting like ```json
+        sanitized_answer = sanitize_json_response(answer)
         try:
-            output = json.loads(answer)
+            output = json.loads(sanitized_answer)
         except json.JSONDecodeError:
             raise ValueError(f"Could not parse response from LLM: {answer}")

mindsdb/interfaces/knowledge_base/llm_client.py CHANGED Viewed

@@ -54,12 +54,12 @@ class LLMClient:
             self.client = module.Handler
-    def completion(self, messages: List[dict]) -> str:
+    def completion(self, messages: List[dict], json_output: bool = False) -> str:
         """
         Call LLM completion and get response
         """
         params = self.params
+        params["json_output"] = json_output
         if self.provider in ("azure_openai", "openai"):
             response = self.client.chat.completions.create(
                 model=params["model_name"],
@@ -69,6 +69,6 @@ class LLMClient:
         else:
             kwargs = params.copy()
             model = kwargs.pop("model_name")
+            kwargs.pop("provider", None)
             response = self.client.completion(self.provider, model=model, messages=messages, args=kwargs)
             return response.choices[0].message.content

mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py CHANGED Viewed

@@ -1,16 +1,17 @@
+import re
+import html
+import asyncio
 from typing import List, Dict, Optional, Any
 import pandas as pd
 from langchain_text_splitters import RecursiveCharacterTextSplitter
-import asyncio
+from langchain_core.documents import Document as LangchainDocument
 from mindsdb.integrations.utilities.rag.splitters.file_splitter import (
     FileSplitter,
     FileSplitterConfig,
 )
 from mindsdb.interfaces.agents.langchain_agent import create_chat_model
 from mindsdb.interfaces.knowledge_base.preprocessing.models import (
     PreprocessingConfig,
     ProcessedChunk,
@@ -21,7 +22,6 @@ from mindsdb.interfaces.knowledge_base.preprocessing.models import (
 )
 from mindsdb.utilities import log
-from langchain_core.documents import Document as LangchainDocument
 logger = log.getLogger(__name__)
@@ -123,11 +123,11 @@ class ContextualPreprocessor(DocumentPreprocessor):
     DEFAULT_CONTEXT_TEMPLATE = """
 <document>
-{{WHOLE_DOCUMENT}}
+{WHOLE_DOCUMENT}
 </document>
 Here is the chunk we want to situate within the whole document
 <chunk>
-{{CHUNK_CONTENT}}
+{CHUNK_CONTENT}
 </chunk>
 Please give a short succinct context to situate this chunk within the overall document for the purposes of improving search retrieval of the chunk. Answer only with the succinct context and nothing else."""
@@ -149,12 +149,20 @@ Please give a short succinct context to situate this chunk within the overall do
         self.summarize = self.config.summarize
     def _prepare_prompts(self, chunk_contents: list[str], full_documents: list[str]) -> list[str]:
-        prompts = [
-            self.context_template.replace("{{WHOLE_DOCUMENT}}", full_document) for full_document in full_documents
-        ]
-        prompts = [
-            prompt.replace("{{CHUNK_CONTENT}}", chunk_content) for prompt, chunk_content in zip(prompts, chunk_contents)
-        ]
+        def tag_replacer(match):
+            tag = match.group(0)
+            if tag.lower() not in ["<document>", "</document>", "<chunk>", "</chunk>"]:
+                return tag
+            return html.escape(tag)
+        tag_pattern = r"</?document>|</?chunk>"
+        prompts = []
+        for chunk_content, full_document in zip(chunk_contents, full_documents):
+            chunk_content = re.sub(tag_pattern, tag_replacer, chunk_content, flags=re.IGNORECASE)
+            full_document = re.sub(tag_pattern, tag_replacer, full_document, flags=re.IGNORECASE)
+            prompts.append(
+                self.DEFAULT_CONTEXT_TEMPLATE.format(WHOLE_DOCUMENT=full_document, CHUNK_CONTENT=chunk_content)
+            )
         return prompts

MindsDB 25.7.2.0__py3-none-any.whl → 25.7.3.0__py3-none-any.whl

Potentially problematic release.

MindsDB 25.7.2.0py3-none-any.whl → 25.7.3.0py3-none-any.whl