PyPI - MindsDB - Versions diffs - 25.1.2.1__py3-none-any.whl → 25.1.4.0__py3-none-any.whl - Mend

MindsDB 25.1.2.1py3-none-any.whl → 25.1.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (77) hide show

{MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/METADATA +244 -242
{MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/RECORD +76 -67
mindsdb/__about__.py +1 -1
mindsdb/__main__.py +5 -3
mindsdb/api/executor/__init__.py +0 -1
mindsdb/api/executor/command_executor.py +2 -1
mindsdb/api/executor/data_types/answer.py +1 -1
mindsdb/api/executor/datahub/datanodes/integration_datanode.py +7 -2
mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -1
mindsdb/api/executor/sql_query/__init__.py +1 -0
mindsdb/api/executor/sql_query/result_set.py +36 -21
mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +1 -1
mindsdb/api/executor/sql_query/steps/join_step.py +4 -4
mindsdb/api/executor/sql_query/steps/map_reduce_step.py +6 -39
mindsdb/api/executor/utilities/sql.py +2 -10
mindsdb/api/http/namespaces/knowledge_bases.py +3 -3
mindsdb/api/http/namespaces/sql.py +3 -1
mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +2 -1
mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +7 -0
mindsdb/api/postgres/postgres_proxy/executor/executor.py +2 -1
mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +2 -2
mindsdb/integrations/handlers/chromadb_handler/requirements.txt +1 -1
mindsdb/integrations/handlers/file_handler/file_handler.py +1 -1
mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +17 -1
mindsdb/integrations/handlers/jira_handler/jira_handler.py +15 -1
mindsdb/integrations/handlers/jira_handler/jira_table.py +52 -31
mindsdb/integrations/handlers/langchain_embedding_handler/fastapi_embeddings.py +82 -0
mindsdb/integrations/handlers/langchain_embedding_handler/langchain_embedding_handler.py +8 -1
mindsdb/integrations/handlers/langchain_handler/requirements.txt +1 -1
mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +48 -16
mindsdb/integrations/handlers/pinecone_handler/pinecone_handler.py +123 -72
mindsdb/integrations/handlers/pinecone_handler/requirements.txt +1 -1
mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +12 -6
mindsdb/integrations/handlers/slack_handler/slack_handler.py +13 -2
mindsdb/integrations/handlers/slack_handler/slack_tables.py +21 -1
mindsdb/integrations/libs/ml_handler_process/learn_process.py +1 -1
mindsdb/integrations/utilities/rag/loaders/vector_store_loader/pgvector.py +76 -27
mindsdb/integrations/utilities/rag/loaders/vector_store_loader/vector_store_loader.py +18 -1
mindsdb/integrations/utilities/rag/pipelines/rag.py +73 -18
mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +166 -108
mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +36 -14
mindsdb/integrations/utilities/rag/settings.py +8 -2
mindsdb/integrations/utilities/sql_utils.py +1 -1
mindsdb/interfaces/agents/agents_controller.py +3 -5
mindsdb/interfaces/agents/langchain_agent.py +112 -150
mindsdb/interfaces/agents/langfuse_callback_handler.py +0 -37
mindsdb/interfaces/agents/mindsdb_database_agent.py +15 -13
mindsdb/interfaces/chatbot/chatbot_controller.py +7 -11
mindsdb/interfaces/chatbot/chatbot_task.py +16 -5
mindsdb/interfaces/chatbot/memory.py +58 -13
mindsdb/interfaces/database/projects.py +17 -15
mindsdb/interfaces/database/views.py +12 -25
mindsdb/interfaces/knowledge_base/controller.py +39 -15
mindsdb/interfaces/model/functions.py +15 -4
mindsdb/interfaces/model/model_controller.py +4 -7
mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +47 -38
mindsdb/interfaces/skills/retrieval_tool.py +10 -3
mindsdb/interfaces/skills/skill_tool.py +97 -53
mindsdb/interfaces/skills/sql_agent.py +77 -36
mindsdb/interfaces/storage/db.py +1 -1
mindsdb/migrations/versions/2025-01-15_c06c35f7e8e1_project_company.py +88 -0
mindsdb/utilities/cache.py +7 -4
mindsdb/utilities/context.py +11 -1
mindsdb/utilities/langfuse.py +264 -0
mindsdb/utilities/log.py +20 -2
mindsdb/utilities/otel/__init__.py +206 -0
mindsdb/utilities/otel/logger.py +25 -0
mindsdb/utilities/otel/meter.py +19 -0
mindsdb/utilities/otel/metric_handlers/__init__.py +25 -0
mindsdb/utilities/otel/tracer.py +16 -0
mindsdb/utilities/partitioning.py +52 -0
mindsdb/utilities/render/sqlalchemy_render.py +7 -1
mindsdb/utilities/utils.py +34 -0
mindsdb/utilities/otel.py +0 -72
{MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/LICENSE +0 -0
{MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/WHEEL +0 -0
{MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/top_level.txt +0 -0

mindsdb/api/executor/sql_query/result_set.py CHANGED Viewed

@@ -1,5 +1,6 @@
-from typing import List
 import copy
+from typing import List, Optional
 import numpy as np
 import pandas as pd
@@ -35,6 +36,19 @@ class Column:
         return f'{self.__class__.__name__}({self.__dict__})'
+def rename_df_columns(df: pd.DataFrame, names: Optional[List] = None) -> None:
+    """Inplace rename of dataframe columns
+    Args:
+        df (pd.DataFrame): dataframe
+        names (Optional[List]): columns names to set
+    """
+    if names is not None:
+        df.columns = names
+    else:
+        df.columns = list(range(len(df.columns)))
 class ResultSet:
     def __init__(self, columns=None, values: List[List] = None, df: pd.DataFrame = None):
         '''
@@ -73,20 +87,19 @@ class ResultSet:
     # --- converters ---
     def from_df(self, df, database=None, table_name=None, table_alias=None):
-        columns_dtypes = list(df.dtypes)
-        for i, col in enumerate(df.columns):
-            self._columns.append(Column(
-                name=col,
+        self._columns = [
+            Column(
+                name=column_name,
                 table_name=table_name,
                 table_alias=table_alias,
                 database=database,
-                type=columns_dtypes[i]
-            ))
+                type=column_dtype
+            ) for column_name, column_dtype
+            in zip(df.columns, df.dtypes)
+        ]
-        # rename columns to indexes
-        self._df = df.set_axis(range(len(df.columns)), axis=1)
+        rename_df_columns(df)
+        self._df = df
         return self
@@ -97,9 +110,6 @@ class ResultSet:
             if col.alias is not None:
                 alias_idx[col.alias] = col
-        # resp_dict = df.to_dict(orient='split')
-        # self._records = resp_dict['data']
         for col in df.columns:
             if col in col_names or strict:
                 column = col_names[col]
@@ -109,13 +119,16 @@ class ResultSet:
                 column = Column(col)
             self._columns.append(column)
-        self._df = df.set_axis(range(len(df.columns)), axis=1)
+        rename_df_columns(df)
+        self._df = df
         return self
     def to_df(self):
-        columns = self.get_column_names()
-        return self.get_raw_df().set_axis(columns, axis=1)
+        columns_names = self.get_column_names()
+        df = self.get_raw_df()
+        rename_df_columns(df, columns_names)
+        return df
     def to_df_cols(self, prefix=''):
         # returns dataframe and dict of columns
@@ -128,7 +141,9 @@ class ResultSet:
             columns.append(name)
             col_names[name] = col
-        return self.get_raw_df().set_axis(columns, axis=1), col_names
+        df = self.get_raw_df()
+        rename_df_columns(df, columns)
+        return df, col_names
     # --- tables ---
@@ -174,7 +189,7 @@ class ResultSet:
         self._columns.pop(idx)
         self._df.drop(idx, axis=1, inplace=True)
-        self._df = self._df.set_axis(range(len(self._df.columns)), axis=1)
+        rename_df_columns(self._df)
     @property
     def columns(self):
@@ -226,7 +241,7 @@ class ResultSet:
         if len(df.columns) != len(self._columns):
             raise WrongArgumentError(f'Record length mismatch columns length: {len(df.columns)} != {len(self.columns)}')
-        df = df.set_axis(range(len(df.columns)), axis=1)
+        rename_df_columns(df)
         if self._df is None:
             self._df = df
@@ -269,7 +284,7 @@ class ResultSet:
     def get_column_values(self, col_idx):
         # get by column index
         df = self.get_raw_df()
-        return list(df[col_idx])
+        return list(df[df.columns[col_idx]])
     def set_column_values(self, col_name, values):
         # values is one value or list of values

mindsdb/api/executor/sql_query/steps/apply_predictor_step.py CHANGED Viewed

@@ -213,7 +213,7 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
                         columns = list(table_df.columns)
                         for col_idx, name in cols_to_rename.items():
                             columns[col_idx] = name
-                        table_df = table_df.set_axis(columns, axis=1)
+                        table_df.columns = columns
                 version = None
                 if len(step.predictor.parts) > 1 and step.predictor.parts[-1].isdigit():

mindsdb/api/executor/sql_query/steps/join_step.py CHANGED Viewed

@@ -90,15 +90,15 @@ class JoinStepCall(BaseStepCall):
         table_b, names_b = right_data.to_df_cols(prefix='B')
         query = f"""
-                       SELECT * FROM table_a {join_type} table_b
-                       ON {join_condition}
-                   """
+            SELECT * FROM table_a {join_type} table_b
+            ON {join_condition}
+        """
         resp_df, _description = query_df_with_type_infer_fallback(query, {
             'table_a': table_a,
             'table_b': table_b
         })
-        resp_df = resp_df.replace({np.nan: None})
+        resp_df.replace({np.nan: None}, inplace=True)
         names_a.update(names_b)
         data = ResultSet().from_df_cols(resp_df, col_names=names_a)

mindsdb/api/executor/sql_query/steps/map_reduce_step.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import os
 import copy
 from mindsdb_sql_parser.ast import (
@@ -15,8 +14,7 @@ from mindsdb.api.executor.planner.steps import (
 from mindsdb.api.executor.sql_query.result_set import ResultSet
 from mindsdb.api.executor.exceptions import LogicError
-from mindsdb.utilities.config import Config
-from mindsdb.utilities.context_executor import execute_in_threads
+from mindsdb.utilities.partitioning import process_dataframe_in_partitions
 from .base import BaseStepCall
@@ -88,43 +86,12 @@ class MapReduceStepCall(BaseStepCall):
         df = input_data.get_raw_df()
-        # tasks
-        def split_data_f(df):
-            chunk = 0
-            while chunk * partition < len(df):
-                # create results with partition
-                df1 = df.iloc[chunk * partition: (chunk + 1) * partition]
-                chunk += 1
-                yield df1, substeps, input_idx, input_columns
-        tasks = split_data_f(df)
-        # workers count
-        is_cloud = Config().get('cloud', False)
-        if is_cloud:
-            max_threads = int(os.getenv('MAX_QUERY_PARTITIONS', 10))
-        else:
-            max_threads = os.cpu_count() - 2
-        # don't exceed chunk_count
-        chunk_count = int(len(df) / partition)
-        max_threads = min(max_threads, chunk_count)
-        if max_threads < 1:
-            max_threads = 1
+        def callback(chunk):
+            return self._exec_partition(chunk, substeps, input_idx, input_columns)
-        if max_threads == 1:
-            # don't spawn threads
-            for task in tasks:
-                sub_data = self._exec_partition(*task)
-                if sub_data:
-                    data = join_query_data(data, sub_data)
-        else:
-            for sub_data in execute_in_threads(self._exec_partition, tasks, thread_count=max_threads):
-                if sub_data:
-                    data = join_query_data(data, sub_data)
+        for result in process_dataframe_in_partitions(df, callback, partition):
+            if result:
+                data = join_query_data(data, result)
         return data

mindsdb/api/executor/utilities/sql.py CHANGED Viewed

@@ -182,14 +182,6 @@ def query_df(df, query, session=None):
                 df = df.astype({'CONNECTION_DATA': 'string'})
     result_df, description = query_df_with_type_infer_fallback(query_str, {'df': df}, user_functions=user_functions)
-    result_df = result_df.replace({np.nan: None})
-    new_column_names = {}
-    real_column_names = [x[0] for x in description]
-    for i, duck_column_name in enumerate(result_df.columns):
-        new_column_names[duck_column_name] = real_column_names[i]
-    result_df = result_df.rename(
-        new_column_names,
-        axis='columns'
-    )
+    result_df.replace({np.nan: None}, inplace=True)
+    result_df.columns = [x[0] for x in description]
     return result_df

mindsdb/api/http/namespaces/knowledge_bases.py CHANGED Viewed

@@ -185,8 +185,10 @@ class KnowledgeBaseResource(Resource):
             )
         try:
+            kb_data = request.json['knowledge_base']
             # Retrieve the knowledge base table for updates
-            table = session.kb_controller.get_table(knowledge_base_name, project.id)
+            table = session.kb_controller.get_table(knowledge_base_name, project.id, params=kb_data.get('params'))
             if table is None:
                 return http_error(
                     HTTPStatus.NOT_FOUND,
@@ -194,8 +196,6 @@ class KnowledgeBaseResource(Resource):
                     f'Knowledge Base with name {knowledge_base_name} does not exist'
                 )
-            kb_data = request.json['knowledge_base']
             # Set up dependencies for DocumentLoader
             file_controller = FileController()
             file_splitter_config = FileSplitterConfig()

mindsdb/api/http/namespaces/sql.py CHANGED Viewed

@@ -78,6 +78,7 @@ class Query(Resource):
                     "error_code": 0,
                     "error_message": str(e),
                 }
+                logger.error(f"Error query processing: \n{traceback.format_exc()}")
             except UnknownError as e:
                 # unclassified
@@ -87,6 +88,7 @@ class Query(Resource):
                     "error_code": 0,
                     "error_message": str(e),
                 }
+                logger.error(f"Error query processing: \n{traceback.format_exc()}")
             except Exception as e:
                 error_type = "unexpected"
@@ -95,7 +97,7 @@ class Query(Resource):
                     "error_code": 0,
                     "error_message": str(e),
                 }
-                logger.debug(f"Error query processing: \n{traceback.format_exc()}")
+                logger.error(f"Error query processing: \n{traceback.format_exc()}")
             if query_response.get("type") == SQL_RESPONSE_TYPE.ERROR:
                 error_type = "expected"

mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py CHANGED Viewed

@@ -2,7 +2,8 @@ from mindsdb_sql_parser import parse_sql
 from mindsdb.api.executor.planner import utils as planner_utils
 import mindsdb.utilities.profiler as profiler
-from mindsdb.api.executor import Column, SQLQuery
+from mindsdb.api.executor.sql_query.result_set import Column
+from mindsdb.api.executor.sql_query import SQLQuery
 from mindsdb.api.executor.command_executor import ExecuteCommands
 from mindsdb.api.mysql.mysql_proxy.utilities import ErSqlSyntaxError
 from mindsdb.utilities import log

mindsdb/api/mysql/mysql_proxy/mysql_proxy.py CHANGED Viewed

@@ -83,6 +83,7 @@ from mindsdb.api.mysql.mysql_proxy.utilities.lightwood_dtype import dtype
 from mindsdb.utilities import log
 from mindsdb.utilities.config import Config
 from mindsdb.utilities.context import context as ctx
+from mindsdb.utilities.otel.metric_handlers import get_query_request_counter
 from mindsdb.utilities.wizards import make_ssl_cert
 logger = log.getLogger(__name__)
@@ -562,6 +563,12 @@ class MysqlProxy(SocketServer.BaseRequestHandler):
                 data=executor.data,
                 status=executor.server_status,
             )
+        # Increment the counter and include metadata in attributes
+        metadata = ctx.metadata(query=sql)
+        query_request_counter = get_query_request_counter()
+        query_request_counter.add(1, metadata)
         return resp
     def answer_stmt_prepare(self, sql):

mindsdb/api/postgres/postgres_proxy/executor/executor.py CHANGED Viewed

@@ -6,7 +6,8 @@ from mindsdb.api.executor.planner import utils as planner_utils
 from numpy import dtype as np_dtype
 from pandas.api import types as pd_types
-from mindsdb.api.executor import SQLQuery, Column
+from mindsdb.api.executor.sql_query import SQLQuery
+from mindsdb.api.executor.sql_query.result_set import Column
 from mindsdb.api.mysql.mysql_proxy.utilities.lightwood_dtype import dtype
 from mindsdb.api.executor.command_executor import ExecuteCommands
 from mindsdb.api.mysql.mysql_proxy.utilities import SqlApiException

mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py CHANGED Viewed

@@ -286,7 +286,7 @@ class ChromaDBHandler(VectorStoreHandler):
         else:
             # general get query
             result = collection.get(
-                ids=id_filters,
+                ids=id_filters or None,
                 where=filters,
                 limit=limit,
                 offset=offset,
@@ -475,7 +475,7 @@ class ChromaDBHandler(VectorStoreHandler):
         collections = self._client.list_collections()
         collections_name = pd.DataFrame(
             columns=["table_name"],
-            data=[collection.name for collection in collections],
+            data=collections,
         )
         return Response(resp_type=RESPONSE_TYPE.TABLE, data_frame=collections_name)

mindsdb/integrations/handlers/chromadb_handler/requirements.txt CHANGED Viewed

	@@ -1 +1 @@
1	- chromadb~=0.4.8
1	+ chromadb~=0.6.3

mindsdb/integrations/handlers/file_handler/file_handler.py CHANGED Viewed

@@ -276,7 +276,7 @@ class FileHandler(DatabaseHandler):
         header = df.columns.values.tolist()
-        df = df.rename(columns={key: key.strip() for key in header})
+        df.columns = [key.strip() for key in header]
         df = df.applymap(clean_cell)
         header = [x.strip() for x in header]

mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py CHANGED Viewed

@@ -25,6 +25,11 @@ test_file_content = [
     [3, -3, 0.3, "C"],
 ]
+test_excel_sheet_content = [
+    ["Sheet_Name"],
+    ["Sheet1"],
+]
 file_records = [("one", 1, test_file_content[0]), ("two", 2, test_file_content[0])]
@@ -349,7 +354,18 @@ def test_get_file_path_with_url(mock_fetch_url):
     ],
 )
 def test_handle_source(file_path, expected_columns):
-    df, col_map = FileHandler._handle_source(file_path)
+    sheet_name = None
+    # Excel files return a list of sheets when queried without a sheet name
+    if file_path.endswith(".xlsx"):
+        df, _ = FileHandler._handle_source(file_path)
+        assert isinstance(df, pandas.DataFrame)
+        assert df.columns.tolist() == test_excel_sheet_content[0]
+        assert len(df) == len(test_excel_sheet_content) - 1
+        assert df.values.tolist() == test_excel_sheet_content[1:]
+        sheet_name = test_excel_sheet_content[1][0]
+    df, _ = FileHandler._handle_source(file_path, sheet_name=sheet_name)
     assert isinstance(df, pandas.DataFrame)
     assert df.columns.tolist() == expected_columns

mindsdb/integrations/handlers/jira_handler/jira_handler.py CHANGED Viewed

@@ -55,9 +55,23 @@ class JiraHandler(APIHandler):
             return self.connection
         s = requests.Session()
+        if self.connection_data.get("cloud", False):
+            params = {
+                "cloud": True,
+                "username": self.connection_data['jira_username'],
+                "password": self.connection_data['jira_api_token'],
+                "url": self.connection_data['jira_url'],
+            }
+        else:
+            params = {
+                "cloud": False,
+                "url": self.connection_data['jira_url'],
+                "session": s
+            }
         s.headers['Authorization'] =  f"Bearer {self.connection_data['jira_api_token']}"
-        self.connection = Jira(url= self.connection_data['jira_url'], session=s)
+        self.connection = Jira(**params)
         self.is_connected = True

mindsdb/integrations/handlers/jira_handler/jira_table.py CHANGED Viewed

@@ -10,9 +10,26 @@ from mindsdb_sql_parser import ast
 logger = log.getLogger(__name__)
+def flatten_json(nested_json, parent_key="", separator="."):
+    """
+    Recursively flattens a nested JSON object into a dictionary with dot notation keys.
+    """
+    items = []
+    for k, v in nested_json.items():
+        new_key = f"{parent_key}{separator}{k}" if parent_key else k
+        if isinstance(v, dict):
+            items.extend(flatten_json(v, new_key, separator=separator).items())
+        else:
+            items.append((new_key, v))
+    return dict(items)
 class JiraProjectsTable(APITable):
     """Jira Projects Table implementation"""
     _MAX_API_RESULTS = 100
     def select(self, query: ast.Select) -> pd.DataFrame:
         """Pulls data from the Jira "get_all_project_issues" API endpoint
         Parameters
@@ -42,8 +59,8 @@ class JiraProjectsTable(APITable):
             for an_order in query.order_by:
                 if an_order.field.parts[0] != "key":
-                    continue
-                if an_order.field.parts[1] in ["reporter","assignee","status"]:
+                    continue
+                if an_order.field.parts[1] in ["reporter", "assignee", "status"]:
                     if issues_kwargs != {}:
                         raise ValueError(
                             "Duplicate order conditions found for reporter,status and assignee"
@@ -61,9 +78,9 @@ class JiraProjectsTable(APITable):
                     raise ValueError(
                         f"Order by unknown column {an_order.field.parts[1]}"
                     )
-        project = self.handler.connection_data['project']
+        project = self.handler.connection_data["project"]
         jira_project_df = self.call_jira_api(project)
         selected_columns = []
         for target in query.targets:
             if isinstance(target, ast.Star):
@@ -74,7 +91,6 @@ class JiraProjectsTable(APITable):
             else:
                 raise ValueError(f"Unknown query target {type(target)}")
         if len(jira_project_df) == 0:
             jira_project_df = pd.DataFrame([], columns=selected_columns)
             return jira_project_df
@@ -88,7 +104,7 @@ class JiraProjectsTable(APITable):
                 by=order_by_conditions["columns"],
                 ascending=order_by_conditions["ascending"],
             )
         if query.limit:
             jira_project_df = jira_project_df.head(total_results)
@@ -102,12 +118,12 @@ class JiraProjectsTable(APITable):
             List of columns
         """
         return [
-        'key',
-        'summary',
-        'status',
-        'reporter',
-        'assignee',
-        'priority',
+            "key",
+            "summary",
+            "status",
+            "reporter",
+            "assignee",
+            "priority",
         ]
     def call_jira_api(self, project):
@@ -116,36 +132,41 @@ class JiraProjectsTable(APITable):
         max_records = jira.get_project_issues_count(project)
         max_records = 100
         jql_query = self.handler.construct_jql()
-        max_results = self._MAX_API_RESULTS
+        max_results = self._MAX_API_RESULTS
         start_index = 0
         total = 1
         fields = [
-        'key',
-        'fields.summary',
-        'fields.status.name',
-        'fields.reporter.name',
-        'fields.assignee.name',
-        'fields.priority.name',
+            "key",
+            "fields.summary",
+            "fields.status.name",
+            "fields.reporter.displayName",
+            "fields.assignee.displayName",
+            "fields.priority.name",
         ]
         all_jira_issues_df = pd.DataFrame(columns=fields)
         while start_index <= total:
-            results = self.handler.connect().jql(jql_query,start=start_index, limit=max_results)
-            df = pd.json_normalize(results['issues'])
+            results = self.handler.connect().jql(
+                jql_query, start=start_index, limit=max_results
+            )
+            flattened_data = [flatten_json(item) for item in results["issues"]]
+            df = pd.DataFrame(flattened_data)
             df = df[fields]
             start_index += max_results
-            total = max_records
+            total = results["total"]
             all_jira_issues_df = pd.concat([all_jira_issues_df, df], axis=0)
+        all_jira_issues_df = all_jira_issues_df.rename(
+            columns={
+                "key": "key",
+                "fields.summary": "summary",
+                "fields.reporter.displayName": "reporter",
+                "fields.assignee.displayName": "assignee",
+                "fields.priority.name": "priority",
+                "fields.status.name": "status",
+            },
+            errors="ignore",
+        )
-        all_jira_issues_df = all_jira_issues_df.rename(columns={
-                                                                'key': 'key',
-                                                                'fields.summary': 'summary',
-                                                                'fields.reporter.name':'reporter',
-                                                                'fields.assignee.name':'assignee',
-                                                                'fields.priority.name':'priority',
-                                                                'fields.status.name':'status'})
         return all_jira_issues_df

mindsdb/integrations/handlers/langchain_embedding_handler/fastapi_embeddings.py ADDED Viewed

@@ -0,0 +1,82 @@
+from typing import Any, List
+from langchain_core.embeddings import Embeddings
+import requests
+class FastAPIEmbeddings(Embeddings):
+    """An embedding extension that interfaces with FAST API. Useful for custom serving solutions."""
+    def __init__(
+        self,
+        api_base: str,
+        model: str,
+        batch_size: int = 32,
+        **kwargs: Any,
+    ):
+        """Initialize the embeddings class.
+        Args:
+            api_base: Base URL for the VLLM server
+            model: Model name/path to use for embeddings
+            batch_size: Batch size for generating embeddings
+        """
+        super().__init__()
+        self.api_base = api_base
+        self.model = model
+        self.batch_size = batch_size
+        # initialize requests here with the api_base
+    def _get_embeddings(self, texts: List[str]) -> List[str]:
+        """Get embeddings for a batch of text chunks.
+        Returns:
+            List of embeddings as strings. For sparse vectors, returns strings in format
+            "{key:value,...}/size" where size is the dimension of the vector space.
+        """
+        headers = {"accept": "application/json", "Content-Type": "application/json"}
+        data = {
+            "input": texts,
+            "model": self.model
+        }
+        response = requests.post(self.api_base, headers=headers, json=data)
+        response.raise_for_status()
+        embeddings = []
+        for response_dict in response.json()["data"]:
+            embedding = response_dict["embedding"]
+            embeddings.append(embedding)
+        return embeddings
+    def embed_documents(self, texts: List[str]) -> List[str]:
+        """Embed a list of documents using vLLM.
+        Args:
+            texts: List of documents to embed
+        Returns:
+            List of embeddings as strings, one for each document.
+            For sparse embeddings, returns strings in format "{key:value,...}/size"
+            For dense embeddings, returns JSON strings of float lists
+        """
+        return self._get_embeddings(texts)
+    def embed_query(self, text: str) -> str:
+        """Embed a single query text using vLLM.
+        Args:
+            text: Query text to embed
+        Returns:
+            Query embedding as a string.
+            For sparse embeddings, returns string in format "{key:value,...}/size"
+            For dense embeddings, returns JSON string of float list
+        """
+        return self._get_embeddings([text])[0]

mindsdb/integrations/handlers/langchain_embedding_handler/langchain_embedding_handler.py CHANGED Viewed

@@ -10,6 +10,7 @@ from mindsdb.integrations.libs.base import BaseMLEngine
 from mindsdb.utilities import log
 from langchain_core.embeddings import Embeddings
 from mindsdb.integrations.handlers.langchain_embedding_handler.vllm_embeddings import VLLMEmbeddings
+from mindsdb.integrations.handlers.langchain_embedding_handler.fastapi_embeddings import FastAPIEmbeddings
 logger = log.getLogger(__name__)
@@ -20,7 +21,10 @@ logger = log.getLogger(__name__)
 # This is used for the user to select the embedding model
 EMBEDDING_MODELS = {
     'VLLM': 'VLLMEmbeddings',
-    'vllm': 'VLLMEmbeddings'
+    'vllm': 'VLLMEmbeddings',
+    'FastAPI': 'FastAPIEmbeddings',
+    'fastapi': 'FastAPIEmbeddings'
 }
 try:
@@ -55,6 +59,9 @@ def get_langchain_class(class_name: str) -> Embeddings:
     if class_name == "VLLMEmbeddings":
         return VLLMEmbeddings
+    if class_name == "FastAPIEmbeddings":
+        return FastAPIEmbeddings
     # Then try langchain_community.embeddings
     try:
         module = importlib.import_module("langchain_community.embeddings")

MindsDB 25.1.2.1__py3-none-any.whl → 25.1.4.0__py3-none-any.whl

Potentially problematic release.

MindsDB 25.1.2.1py3-none-any.whl → 25.1.4.0py3-none-any.whl