PyPI - MindsDB - Versions diffs - 25.2.4.0__py3-none-any.whl → 25.3.1.0__py3-none-any.whl - Mend

MindsDB 25.2.4.0py3-none-any.whl → 25.3.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (54) hide show

mindsdb/integrations/handlers/youtube_handler/youtube_handler.py CHANGED Viewed

@@ -10,9 +10,7 @@ from mindsdb.integrations.libs.response import (
 from mindsdb.utilities import log
 from mindsdb_sql_parser import parse_sql
-from collections import OrderedDict
 from mindsdb.utilities.config import Config
-from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE
 from googleapiclient.discovery import build
@@ -38,12 +36,10 @@ class YoutubeHandler(APIHandler):
             name of a handler instance
         """
         super().__init__(name)
-        connection_data = kwargs.get("connection_data", {})
+        self.connection_data = kwargs.get("connection_data", {})
+        self.kwargs = kwargs
         self.parser = parse_sql
-        self.connection_data = connection_data
-        self.kwargs = kwargs
         self.connection = None
         self.is_connected = False
@@ -104,7 +100,6 @@ class YoutubeHandler(APIHandler):
             Status confirmation
         """
         response = StatusResponse(False)
-        need_to_close = self.is_connected is False
         try:
             self.connect()
@@ -131,34 +126,3 @@ class YoutubeHandler(APIHandler):
         """
         ast = parse_sql(query)
         return self.query(ast)
-connection_args = OrderedDict(
-    youtube_access_token={
-        "type": ARG_TYPE.STR,
-        "description": "API Key",
-        "label": "API Key",
-    },
-    credentials_url={
-        'type': ARG_TYPE.STR,
-        'description': 'URL to OAuth2 Credentials',
-        'label': 'URL to OAuth2 Credentials',
-    },
-    credentials_file={
-        'type': ARG_TYPE.STR,
-        'description': 'Location of OAuth2 Credentials',
-        'label': 'Location of OAuth2 Credentials',
-    },
-    credentials={
-        'type': ARG_TYPE.PATH,
-        'description': 'OAuth2 Credentials',
-        'label': 'Upload OAuth2 Credentials',
-    },
-    code={
-        'type': ARG_TYPE.STR,
-        'description': 'Authentication Code',
-        'label': 'Authentication Code',
-    }
-)
-connection_args_example = OrderedDict(youtube_api_token="<your-youtube-api-token>")

mindsdb/integrations/libs/llm/utils.py CHANGED Viewed

@@ -16,6 +16,7 @@ from mindsdb.integrations.libs.llm.config import (
     NvidiaNIMConfig,
     MindsdbConfig,
 )
+from mindsdb.utilities.config import config
 from langchain_text_splitters import Language, RecursiveCharacterTextSplitter
@@ -211,7 +212,7 @@ def get_llm_config(provider: str, args: Dict) -> BaseLLMConfig:
     if provider == "mindsdb":
         return MindsdbConfig(
             model_name=args["model_name"],
-            project_name=args.get("project_name", "mindsdb"),
+            project_name=args.get("project_name", config.get("default_project")),
         )
     if provider == "vllm":
         return OpenAIConfig(

mindsdb/integrations/utilities/handlers/auth_utilities/google/google_user_oauth_utilities.py CHANGED Viewed

@@ -1,9 +1,8 @@
-import os
 import json
+from pathlib import Path
 import requests
 import datetime as dt
 from flask import request
-from shutil import copyfile
 from mindsdb.utilities import log
@@ -29,73 +28,65 @@ class GoogleUserOAuth2Manager:
         creds = None
         if self.credentials_file or self.credentials_url:
-            # get the current directory and checks tokens & creds
-            curr_dir = self.handler_storage.folder_get('config')
+            oauth_user_info = self.handler_storage.encrypted_json_get('oauth_user_info')
-            creds_file = os.path.join(curr_dir, 'creds.json')
-            secret_file = os.path.join(curr_dir, 'secret.json')
-            if os.path.isfile(creds_file):
-                creds = Credentials.from_authorized_user_file(creds_file, self.scopes)
+            if oauth_user_info:
+                creds = Credentials.from_authorized_user_info(oauth_user_info, self.scopes)
             if not creds or not creds.valid:
                 logger.debug("Credentials do not exist or are invalid, attempting to authorize again")
-                if self._download_secret_file(secret_file):
-                    # save to storage
-                    self.handler_storage.folder_sync('config')
-                else:
-                    raise ValueError('No valid Gmail Credentials filepath or S3 url found.')
+                oauth_user_info = self._download_oauth_user_info()
                 if creds and creds.expired and creds.refresh_token:
                     creds.refresh(Request())
                     logger.debug("Credentials refreshed successfully")
                 else:
-                    creds = self._execute_google_auth_flow(secret_file, self.scopes, self.code)
+                    creds = self._execute_google_auth_flow(oauth_user_info)
                     logger.debug("New credentials obtained")
-                self._save_credentials_to_file(creds, creds_file)
-                logger.debug(f"saved session credentials to {creds_file}")
-                self.handler_storage.folder_sync('config')
+                self.handler_storage.encrypted_json_set('oauth_user_info', self._convert_credentials_to_dict(creds))
+                logger.debug("Saving credentials to storage")
         return creds
-    def _download_secret_file(self, secret_file):
-        # if credentials_url is set, attempt to download the file
+    def _download_oauth_user_info(self):
+        # if credentials_url is set, attempt to download the contents of the files
         # this will be given preference over credentials_file
         if self.credentials_url:
             response = requests.get(self.credentials_url)
             if response.status_code == 200:
-                with open(secret_file, 'w') as creds:
-                    creds.write(response.text)
-                return True
+                return response.json()
             else:
-                logger.error("Failed to get credentials from S3", response.status_code)
+                logger.error("Failed to get credentials from URL", response.status_code)
+        # if credentials_file is set, attempt to read the contents of the file
+        if self.credentials_file:
+            path = Path(self.credentials_file).expanduser()
+            if path.exists():
+                with open(path, 'r') as f:
+                    return json.load(f)
+            else:
+                logger.error("Credentials file does not exist")
-        # if credentials_file is set, attempt to copy the file
-        if self.credentials_file and os.path.isfile(self.credentials_file):
-            copyfile(self.credentials_file, secret_file)
-            return True
-        return False
+        raise ValueError('OAuth2 credentials could not be found')
-    def _execute_google_auth_flow(self, secret_file, scopes, code=None):
-        flow = Flow.from_client_secrets_file(secret_file, scopes)
+    def _execute_google_auth_flow(self, oauth_user_info: dict):
+        flow = Flow.from_client_config(
+            oauth_user_info,
+            scopes=self.scopes
+        )
         flow.redirect_uri = request.headers['ORIGIN'] + '/verify-auth'
-        if code:
-            flow.fetch_token(code=code)
+        if self.code:
+            flow.fetch_token(code=self.code)
             creds = flow.credentials
             return creds
         else:
             auth_url = flow.authorization_url()[0]
             raise AuthException(f'Authorisation required. Please follow the url: {auth_url}', auth_url=auth_url)
-    def _save_credentials_to_file(self, creds, file_path):
-        with open(file_path, 'w') as token:
-            data = self._convert_credentials_to_dict(creds)
-            token.write(json.dumps(data))
     def _convert_credentials_to_dict(self, credentials):
         return {
             'token': credentials.token,

mindsdb/integrations/utilities/pydantic_utils.py ADDED Viewed

@@ -0,0 +1,208 @@
+import pprint
+pydantic_schema_description = """## Understanding Pydantic Schemas for JSON Formatting
+Pydantic schemas provide a framework for defining the structure and validation rules for JSON output. Below is an overview of key components commonly found in a Pydantic schema:
+### Key Components
+Each object in the schema represents a Pydantic model in JSON format. Typical fields in a Pydantic model description include:
+- **`anyOf`**:
+  - A list describing possible values for a Pydantic model field.
+- **`additionalProperties`**:
+  - Describes the keys of a dictionary. Keys are always of type `string` due to this being a JSON Pydantic schema. The corresponding key types supported by Pydantic are:
+    - `string`: a text string
+    - `integer`: an integer number
+    - `number`: a floating-point number
+- **`items`**:
+  - Describes the items contained within an `array` (list).
+- **`type`**:
+  - Specifies the Pydantic type assigned to the field, defining the expected data type. Common types include:
+    - `string`: a text string
+    - `integer`: an integer number
+    - `number`: a floating-point number
+    - `array`: a list
+    - `object`: a dictionary
+    - `null`: the python null value None. Indicates the field is optional.
+- **`description`**:
+  - Provides a textual narrative explaining the purpose and details of the output JSON field.
+- **`title`**:
+  - A Pydantic-generated, human-readable title for the field.
+- **`default`**:
+  - The default value for this field if no value is provided by the user.
+### Schema
+Below is the Pydantic schema:
+{schema}
+### Examples
+Below is an example of well-formed output adhering to this schema.
+- Dummy text strings are represented as "lorem ipsum."
+{example}
+"""
+def get_dummy_value(field_value):
+    """A function to return a dummy value of a Pydantic model field."""
+    type_str = field_value["type"]
+    example_dict = {
+        "string": "lorem ipsum",
+        "int": 3,
+        "number": 42.0,
+        "null": None,
+        "object": {"lorem ipsum": "lorem_ipsum"},
+    }
+    if type_str in example_dict:
+        return example_dict[type_str]
+    else:
+        return None
+def get_dummy_array(field_value):
+    """A function to return a dummy array of a Pydantic model field."""
+    items = field_value["items"]
+    if "type" in items:
+        if items["type"] == "null":  # skip if null
+            pass
+        elif items["type"] == "array":  # is it an array?
+            array_value = get_dummy_array(items)
+        elif (
+            items["type"] == "object" and "additionalProperties" in items
+        ):  # is it a dict?
+            array_value = get_dummy_dict(items)
+        else:  # it is a regular value!
+            array_value = get_dummy_value(items)
+        return [array_value for _ in range(2)]
+    elif "AnyOf" in field_value["items"]:
+        array_value = get_any_of(field_value["items"])  # can be one of many types
+        return [array_value for _ in range(2)]
+    else:  # is it a pydantic class?
+        array_value = example_generator(items)
+        return [array_value for _ in range(2)]
+def get_dummy_dict(field_value):
+    """A function to return a dummy dictionary of a Pydantic model field."""
+    return get_dummy_value(field_value)
+def get_any_of(field_value):
+    """A function to return the first viable pydantic type of an Any() Pydantic model field."""
+    for any_of in field_value["anyOf"]:
+        if "type" in any_of:
+            if any_of["type"] == "null":  # skip if null
+                continue
+            elif any_of["type"] == "array":  # is it an array?
+                out = get_dummy_array(any_of)
+                return out
+            elif (
+                any_of["type"] == "object" and "additionalProperties" in any_of
+            ):  # is it a dict?
+                out = get_dummy_dict(any_of)
+                return out
+            else:  # it is a regular value!
+                out = get_dummy_value(any_of)
+                return out
+        else:  # is it a pydantic class?
+            out = example_generator(any_of)
+            return out
+def example_generator(pydantic_json_schema):
+    """dynamically parse a pydantic object and generate an example of it's formatting."""
+    example_dict = {}
+    for schema_name, schema in pydantic_json_schema.items():
+        for field_name, field_value in schema.items():
+            if "type" in field_value:
+                if field_value["type"] == "array":  # is it an array?
+                    example_dict[field_name] = get_dummy_array(field_value)
+                elif (
+                    field_value["type"] == "object"
+                    and "additionalProperties" in field_value
+                ):  # is it a dict?
+                    example_dict[field_name] = get_dummy_dict(field_value)
+                else:  # it is a regular value!
+                    example_dict[field_name] = get_dummy_value(field_value)
+            elif "anyOf" in field_value:
+                example_dict[field_name] = get_any_of(field_value)
+            else:  # it is a pydantic class
+                example_dict[field_name] = example_generator(field_value)
+    return example_dict
+def search_and_replace_refs(schema, defs, ref_skip={}, n=0):
+    """Dynamically substitute subclass references in a Pydantic object schema."""
+    for key, value in schema.items():
+        if key in ref_skip:
+            continue
+        if type(value) is dict:
+            if "$ref" in value:
+                definition_key = value["$ref"].split("/")[-1]
+                if definition_key in ref_skip:
+                    schema[key] = {"type": "null"}
+                else:
+                    schema[key] = {definition_key: defs[definition_key]["properties"]}
+            else:
+                search_and_replace_refs(value, defs, ref_skip, n + 1)
+        elif type(value) is list:
+            for val in value:
+                search_and_replace_refs(val, defs, ref_skip, n + 1)
+def remove_extraneous_fields(schema, ref_skip):
+    """Remove extraneous fields from object descriptions."""
+    reduced_schema = schema["properties"]
+    for ref in ref_skip.keys():
+        if ref in reduced_schema:
+            del reduced_schema[ref]
+    for key, value in reduced_schema.items():
+        if "title" in value:
+            del value["title"]
+        if "$defs" in value:
+            del value["$defs"]
+        if "required" in value:
+            del value["required"]
+    return reduced_schema
+def format_for_prompt(pydantic_object, ref_skip={}):
+    """Format a Pydantic object description for prompting an LLM."""
+    schema = {k: v for k, v in pydantic_object.schema().items()}
+    search_and_replace_refs(
+        schema=schema["properties"], defs=schema["$defs"], ref_skip=ref_skip, n=0
+    )
+    reduced_schema = remove_extraneous_fields(schema, ref_skip)
+    reduced_schema = {schema["title"]: reduced_schema}
+    out = pprint.pformat(reduced_schema)
+    return out, reduced_schema

mindsdb/integrations/utilities/rag/pipelines/rag.py CHANGED Viewed

@@ -294,16 +294,23 @@ class LangChainRAGPipeline:
         retriever = SQLRetriever(
             fallback_retriever=vector_store_retriever,
             vector_store_handler=knowledge_base_table.get_vector_db(),
-            metadata_schemas=retriever_config.metadata_schemas,
-            examples=retriever_config.examples,
+            min_k=retriever_config.min_k,
+            max_filters=retriever_config.max_filters,
+            filter_threshold=retriever_config.filter_threshold,
+            database_schema=retriever_config.database_schema,
             embeddings_model=embeddings,
+            search_kwargs=config.search_kwargs,
             rewrite_prompt_template=retriever_config.rewrite_prompt_template,
-            metadata_filters_prompt_template=retriever_config.metadata_filters_prompt_template,
+            table_prompt_template=retriever_config.table_prompt_template,
+            column_prompt_template=retriever_config.column_prompt_template,
+            value_prompt_template=retriever_config.value_prompt_template,
+            boolean_system_prompt=retriever_config.boolean_system_prompt,
+            generative_system_prompt=retriever_config.generative_system_prompt,
             num_retries=retriever_config.num_retries,
             embeddings_table=knowledge_base_table._kb.vector_database_table,
             source_table=retriever_config.source_table,
+            source_id_column=retriever_config.source_id_column,
             distance_function=distance_function,
-            search_kwargs=config.search_kwargs,
             llm=sql_llm
         )
         return cls(

MindsDB 25.2.4.0__py3-none-any.whl → 25.3.1.0__py3-none-any.whl

Potentially problematic release.

MindsDB 25.2.4.0py3-none-any.whl → 25.3.1.0py3-none-any.whl