MindsDB 25.1.2.1__py3-none-any.whl → 25.1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (77) hide show
  1. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/METADATA +244 -242
  2. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/RECORD +76 -67
  3. mindsdb/__about__.py +1 -1
  4. mindsdb/__main__.py +5 -3
  5. mindsdb/api/executor/__init__.py +0 -1
  6. mindsdb/api/executor/command_executor.py +2 -1
  7. mindsdb/api/executor/data_types/answer.py +1 -1
  8. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +7 -2
  9. mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -1
  10. mindsdb/api/executor/sql_query/__init__.py +1 -0
  11. mindsdb/api/executor/sql_query/result_set.py +36 -21
  12. mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +1 -1
  13. mindsdb/api/executor/sql_query/steps/join_step.py +4 -4
  14. mindsdb/api/executor/sql_query/steps/map_reduce_step.py +6 -39
  15. mindsdb/api/executor/utilities/sql.py +2 -10
  16. mindsdb/api/http/namespaces/knowledge_bases.py +3 -3
  17. mindsdb/api/http/namespaces/sql.py +3 -1
  18. mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +2 -1
  19. mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +7 -0
  20. mindsdb/api/postgres/postgres_proxy/executor/executor.py +2 -1
  21. mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +2 -2
  22. mindsdb/integrations/handlers/chromadb_handler/requirements.txt +1 -1
  23. mindsdb/integrations/handlers/file_handler/file_handler.py +1 -1
  24. mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +17 -1
  25. mindsdb/integrations/handlers/jira_handler/jira_handler.py +15 -1
  26. mindsdb/integrations/handlers/jira_handler/jira_table.py +52 -31
  27. mindsdb/integrations/handlers/langchain_embedding_handler/fastapi_embeddings.py +82 -0
  28. mindsdb/integrations/handlers/langchain_embedding_handler/langchain_embedding_handler.py +8 -1
  29. mindsdb/integrations/handlers/langchain_handler/requirements.txt +1 -1
  30. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +48 -16
  31. mindsdb/integrations/handlers/pinecone_handler/pinecone_handler.py +123 -72
  32. mindsdb/integrations/handlers/pinecone_handler/requirements.txt +1 -1
  33. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +12 -6
  34. mindsdb/integrations/handlers/slack_handler/slack_handler.py +13 -2
  35. mindsdb/integrations/handlers/slack_handler/slack_tables.py +21 -1
  36. mindsdb/integrations/libs/ml_handler_process/learn_process.py +1 -1
  37. mindsdb/integrations/utilities/rag/loaders/vector_store_loader/pgvector.py +76 -27
  38. mindsdb/integrations/utilities/rag/loaders/vector_store_loader/vector_store_loader.py +18 -1
  39. mindsdb/integrations/utilities/rag/pipelines/rag.py +73 -18
  40. mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +166 -108
  41. mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +36 -14
  42. mindsdb/integrations/utilities/rag/settings.py +8 -2
  43. mindsdb/integrations/utilities/sql_utils.py +1 -1
  44. mindsdb/interfaces/agents/agents_controller.py +3 -5
  45. mindsdb/interfaces/agents/langchain_agent.py +112 -150
  46. mindsdb/interfaces/agents/langfuse_callback_handler.py +0 -37
  47. mindsdb/interfaces/agents/mindsdb_database_agent.py +15 -13
  48. mindsdb/interfaces/chatbot/chatbot_controller.py +7 -11
  49. mindsdb/interfaces/chatbot/chatbot_task.py +16 -5
  50. mindsdb/interfaces/chatbot/memory.py +58 -13
  51. mindsdb/interfaces/database/projects.py +17 -15
  52. mindsdb/interfaces/database/views.py +12 -25
  53. mindsdb/interfaces/knowledge_base/controller.py +39 -15
  54. mindsdb/interfaces/model/functions.py +15 -4
  55. mindsdb/interfaces/model/model_controller.py +4 -7
  56. mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +47 -38
  57. mindsdb/interfaces/skills/retrieval_tool.py +10 -3
  58. mindsdb/interfaces/skills/skill_tool.py +97 -53
  59. mindsdb/interfaces/skills/sql_agent.py +77 -36
  60. mindsdb/interfaces/storage/db.py +1 -1
  61. mindsdb/migrations/versions/2025-01-15_c06c35f7e8e1_project_company.py +88 -0
  62. mindsdb/utilities/cache.py +7 -4
  63. mindsdb/utilities/context.py +11 -1
  64. mindsdb/utilities/langfuse.py +264 -0
  65. mindsdb/utilities/log.py +20 -2
  66. mindsdb/utilities/otel/__init__.py +206 -0
  67. mindsdb/utilities/otel/logger.py +25 -0
  68. mindsdb/utilities/otel/meter.py +19 -0
  69. mindsdb/utilities/otel/metric_handlers/__init__.py +25 -0
  70. mindsdb/utilities/otel/tracer.py +16 -0
  71. mindsdb/utilities/partitioning.py +52 -0
  72. mindsdb/utilities/render/sqlalchemy_render.py +7 -1
  73. mindsdb/utilities/utils.py +34 -0
  74. mindsdb/utilities/otel.py +0 -72
  75. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/LICENSE +0 -0
  76. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/WHEEL +0 -0
  77. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,6 @@
1
- from typing import List
2
1
  import copy
2
+ from typing import List, Optional
3
+
3
4
  import numpy as np
4
5
  import pandas as pd
5
6
 
@@ -35,6 +36,19 @@ class Column:
35
36
  return f'{self.__class__.__name__}({self.__dict__})'
36
37
 
37
38
 
39
+ def rename_df_columns(df: pd.DataFrame, names: Optional[List] = None) -> None:
40
+ """Inplace rename of dataframe columns
41
+
42
+ Args:
43
+ df (pd.DataFrame): dataframe
44
+ names (Optional[List]): columns names to set
45
+ """
46
+ if names is not None:
47
+ df.columns = names
48
+ else:
49
+ df.columns = list(range(len(df.columns)))
50
+
51
+
38
52
  class ResultSet:
39
53
  def __init__(self, columns=None, values: List[List] = None, df: pd.DataFrame = None):
40
54
  '''
@@ -73,20 +87,19 @@ class ResultSet:
73
87
  # --- converters ---
74
88
 
75
89
  def from_df(self, df, database=None, table_name=None, table_alias=None):
76
-
77
- columns_dtypes = list(df.dtypes)
78
-
79
- for i, col in enumerate(df.columns):
80
- self._columns.append(Column(
81
- name=col,
90
+ self._columns = [
91
+ Column(
92
+ name=column_name,
82
93
  table_name=table_name,
83
94
  table_alias=table_alias,
84
95
  database=database,
85
- type=columns_dtypes[i]
86
- ))
96
+ type=column_dtype
97
+ ) for column_name, column_dtype
98
+ in zip(df.columns, df.dtypes)
99
+ ]
87
100
 
88
- # rename columns to indexes
89
- self._df = df.set_axis(range(len(df.columns)), axis=1)
101
+ rename_df_columns(df)
102
+ self._df = df
90
103
 
91
104
  return self
92
105
 
@@ -97,9 +110,6 @@ class ResultSet:
97
110
  if col.alias is not None:
98
111
  alias_idx[col.alias] = col
99
112
 
100
- # resp_dict = df.to_dict(orient='split')
101
- # self._records = resp_dict['data']
102
-
103
113
  for col in df.columns:
104
114
  if col in col_names or strict:
105
115
  column = col_names[col]
@@ -109,13 +119,16 @@ class ResultSet:
109
119
  column = Column(col)
110
120
  self._columns.append(column)
111
121
 
112
- self._df = df.set_axis(range(len(df.columns)), axis=1)
122
+ rename_df_columns(df)
123
+ self._df = df
113
124
 
114
125
  return self
115
126
 
116
127
  def to_df(self):
117
- columns = self.get_column_names()
118
- return self.get_raw_df().set_axis(columns, axis=1)
128
+ columns_names = self.get_column_names()
129
+ df = self.get_raw_df()
130
+ rename_df_columns(df, columns_names)
131
+ return df
119
132
 
120
133
  def to_df_cols(self, prefix=''):
121
134
  # returns dataframe and dict of columns
@@ -128,7 +141,9 @@ class ResultSet:
128
141
  columns.append(name)
129
142
  col_names[name] = col
130
143
 
131
- return self.get_raw_df().set_axis(columns, axis=1), col_names
144
+ df = self.get_raw_df()
145
+ rename_df_columns(df, columns)
146
+ return df, col_names
132
147
 
133
148
  # --- tables ---
134
149
 
@@ -174,7 +189,7 @@ class ResultSet:
174
189
  self._columns.pop(idx)
175
190
 
176
191
  self._df.drop(idx, axis=1, inplace=True)
177
- self._df = self._df.set_axis(range(len(self._df.columns)), axis=1)
192
+ rename_df_columns(self._df)
178
193
 
179
194
  @property
180
195
  def columns(self):
@@ -226,7 +241,7 @@ class ResultSet:
226
241
  if len(df.columns) != len(self._columns):
227
242
  raise WrongArgumentError(f'Record length mismatch columns length: {len(df.columns)} != {len(self.columns)}')
228
243
 
229
- df = df.set_axis(range(len(df.columns)), axis=1)
244
+ rename_df_columns(df)
230
245
 
231
246
  if self._df is None:
232
247
  self._df = df
@@ -269,7 +284,7 @@ class ResultSet:
269
284
  def get_column_values(self, col_idx):
270
285
  # get by column index
271
286
  df = self.get_raw_df()
272
- return list(df[col_idx])
287
+ return list(df[df.columns[col_idx]])
273
288
 
274
289
  def set_column_values(self, col_name, values):
275
290
  # values is one value or list of values
@@ -213,7 +213,7 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
213
213
  columns = list(table_df.columns)
214
214
  for col_idx, name in cols_to_rename.items():
215
215
  columns[col_idx] = name
216
- table_df = table_df.set_axis(columns, axis=1)
216
+ table_df.columns = columns
217
217
 
218
218
  version = None
219
219
  if len(step.predictor.parts) > 1 and step.predictor.parts[-1].isdigit():
@@ -90,15 +90,15 @@ class JoinStepCall(BaseStepCall):
90
90
  table_b, names_b = right_data.to_df_cols(prefix='B')
91
91
 
92
92
  query = f"""
93
- SELECT * FROM table_a {join_type} table_b
94
- ON {join_condition}
95
- """
93
+ SELECT * FROM table_a {join_type} table_b
94
+ ON {join_condition}
95
+ """
96
96
  resp_df, _description = query_df_with_type_infer_fallback(query, {
97
97
  'table_a': table_a,
98
98
  'table_b': table_b
99
99
  })
100
100
 
101
- resp_df = resp_df.replace({np.nan: None})
101
+ resp_df.replace({np.nan: None}, inplace=True)
102
102
 
103
103
  names_a.update(names_b)
104
104
  data = ResultSet().from_df_cols(resp_df, col_names=names_a)
@@ -1,4 +1,3 @@
1
- import os
2
1
  import copy
3
2
 
4
3
  from mindsdb_sql_parser.ast import (
@@ -15,8 +14,7 @@ from mindsdb.api.executor.planner.steps import (
15
14
 
16
15
  from mindsdb.api.executor.sql_query.result_set import ResultSet
17
16
  from mindsdb.api.executor.exceptions import LogicError
18
- from mindsdb.utilities.config import Config
19
- from mindsdb.utilities.context_executor import execute_in_threads
17
+ from mindsdb.utilities.partitioning import process_dataframe_in_partitions
20
18
 
21
19
  from .base import BaseStepCall
22
20
 
@@ -88,43 +86,12 @@ class MapReduceStepCall(BaseStepCall):
88
86
 
89
87
  df = input_data.get_raw_df()
90
88
 
91
- # tasks
92
- def split_data_f(df):
93
- chunk = 0
94
- while chunk * partition < len(df):
95
- # create results with partition
96
- df1 = df.iloc[chunk * partition: (chunk + 1) * partition]
97
- chunk += 1
98
- yield df1, substeps, input_idx, input_columns
99
-
100
- tasks = split_data_f(df)
101
-
102
- # workers count
103
- is_cloud = Config().get('cloud', False)
104
- if is_cloud:
105
- max_threads = int(os.getenv('MAX_QUERY_PARTITIONS', 10))
106
- else:
107
- max_threads = os.cpu_count() - 2
108
-
109
- # don't exceed chunk_count
110
- chunk_count = int(len(df) / partition)
111
- max_threads = min(max_threads, chunk_count)
112
-
113
- if max_threads < 1:
114
- max_threads = 1
89
+ def callback(chunk):
90
+ return self._exec_partition(chunk, substeps, input_idx, input_columns)
115
91
 
116
- if max_threads == 1:
117
- # don't spawn threads
118
-
119
- for task in tasks:
120
- sub_data = self._exec_partition(*task)
121
- if sub_data:
122
- data = join_query_data(data, sub_data)
123
-
124
- else:
125
- for sub_data in execute_in_threads(self._exec_partition, tasks, thread_count=max_threads):
126
- if sub_data:
127
- data = join_query_data(data, sub_data)
92
+ for result in process_dataframe_in_partitions(df, callback, partition):
93
+ if result:
94
+ data = join_query_data(data, result)
128
95
 
129
96
  return data
130
97
 
@@ -182,14 +182,6 @@ def query_df(df, query, session=None):
182
182
  df = df.astype({'CONNECTION_DATA': 'string'})
183
183
 
184
184
  result_df, description = query_df_with_type_infer_fallback(query_str, {'df': df}, user_functions=user_functions)
185
- result_df = result_df.replace({np.nan: None})
186
-
187
- new_column_names = {}
188
- real_column_names = [x[0] for x in description]
189
- for i, duck_column_name in enumerate(result_df.columns):
190
- new_column_names[duck_column_name] = real_column_names[i]
191
- result_df = result_df.rename(
192
- new_column_names,
193
- axis='columns'
194
- )
185
+ result_df.replace({np.nan: None}, inplace=True)
186
+ result_df.columns = [x[0] for x in description]
195
187
  return result_df
@@ -185,8 +185,10 @@ class KnowledgeBaseResource(Resource):
185
185
  )
186
186
 
187
187
  try:
188
+ kb_data = request.json['knowledge_base']
189
+
188
190
  # Retrieve the knowledge base table for updates
189
- table = session.kb_controller.get_table(knowledge_base_name, project.id)
191
+ table = session.kb_controller.get_table(knowledge_base_name, project.id, params=kb_data.get('params'))
190
192
  if table is None:
191
193
  return http_error(
192
194
  HTTPStatus.NOT_FOUND,
@@ -194,8 +196,6 @@ class KnowledgeBaseResource(Resource):
194
196
  f'Knowledge Base with name {knowledge_base_name} does not exist'
195
197
  )
196
198
 
197
- kb_data = request.json['knowledge_base']
198
-
199
199
  # Set up dependencies for DocumentLoader
200
200
  file_controller = FileController()
201
201
  file_splitter_config = FileSplitterConfig()
@@ -78,6 +78,7 @@ class Query(Resource):
78
78
  "error_code": 0,
79
79
  "error_message": str(e),
80
80
  }
81
+ logger.error(f"Error query processing: \n{traceback.format_exc()}")
81
82
 
82
83
  except UnknownError as e:
83
84
  # unclassified
@@ -87,6 +88,7 @@ class Query(Resource):
87
88
  "error_code": 0,
88
89
  "error_message": str(e),
89
90
  }
91
+ logger.error(f"Error query processing: \n{traceback.format_exc()}")
90
92
 
91
93
  except Exception as e:
92
94
  error_type = "unexpected"
@@ -95,7 +97,7 @@ class Query(Resource):
95
97
  "error_code": 0,
96
98
  "error_message": str(e),
97
99
  }
98
- logger.debug(f"Error query processing: \n{traceback.format_exc()}")
100
+ logger.error(f"Error query processing: \n{traceback.format_exc()}")
99
101
 
100
102
  if query_response.get("type") == SQL_RESPONSE_TYPE.ERROR:
101
103
  error_type = "expected"
@@ -2,7 +2,8 @@ from mindsdb_sql_parser import parse_sql
2
2
  from mindsdb.api.executor.planner import utils as planner_utils
3
3
 
4
4
  import mindsdb.utilities.profiler as profiler
5
- from mindsdb.api.executor import Column, SQLQuery
5
+ from mindsdb.api.executor.sql_query.result_set import Column
6
+ from mindsdb.api.executor.sql_query import SQLQuery
6
7
  from mindsdb.api.executor.command_executor import ExecuteCommands
7
8
  from mindsdb.api.mysql.mysql_proxy.utilities import ErSqlSyntaxError
8
9
  from mindsdb.utilities import log
@@ -83,6 +83,7 @@ from mindsdb.api.mysql.mysql_proxy.utilities.lightwood_dtype import dtype
83
83
  from mindsdb.utilities import log
84
84
  from mindsdb.utilities.config import Config
85
85
  from mindsdb.utilities.context import context as ctx
86
+ from mindsdb.utilities.otel.metric_handlers import get_query_request_counter
86
87
  from mindsdb.utilities.wizards import make_ssl_cert
87
88
 
88
89
  logger = log.getLogger(__name__)
@@ -562,6 +563,12 @@ class MysqlProxy(SocketServer.BaseRequestHandler):
562
563
  data=executor.data,
563
564
  status=executor.server_status,
564
565
  )
566
+
567
+ # Increment the counter and include metadata in attributes
568
+ metadata = ctx.metadata(query=sql)
569
+ query_request_counter = get_query_request_counter()
570
+ query_request_counter.add(1, metadata)
571
+
565
572
  return resp
566
573
 
567
574
  def answer_stmt_prepare(self, sql):
@@ -6,7 +6,8 @@ from mindsdb.api.executor.planner import utils as planner_utils
6
6
  from numpy import dtype as np_dtype
7
7
  from pandas.api import types as pd_types
8
8
 
9
- from mindsdb.api.executor import SQLQuery, Column
9
+ from mindsdb.api.executor.sql_query import SQLQuery
10
+ from mindsdb.api.executor.sql_query.result_set import Column
10
11
  from mindsdb.api.mysql.mysql_proxy.utilities.lightwood_dtype import dtype
11
12
  from mindsdb.api.executor.command_executor import ExecuteCommands
12
13
  from mindsdb.api.mysql.mysql_proxy.utilities import SqlApiException
@@ -286,7 +286,7 @@ class ChromaDBHandler(VectorStoreHandler):
286
286
  else:
287
287
  # general get query
288
288
  result = collection.get(
289
- ids=id_filters,
289
+ ids=id_filters or None,
290
290
  where=filters,
291
291
  limit=limit,
292
292
  offset=offset,
@@ -475,7 +475,7 @@ class ChromaDBHandler(VectorStoreHandler):
475
475
  collections = self._client.list_collections()
476
476
  collections_name = pd.DataFrame(
477
477
  columns=["table_name"],
478
- data=[collection.name for collection in collections],
478
+ data=collections,
479
479
  )
480
480
  return Response(resp_type=RESPONSE_TYPE.TABLE, data_frame=collections_name)
481
481
 
@@ -1 +1 @@
1
- chromadb~=0.4.8
1
+ chromadb~=0.6.3
@@ -276,7 +276,7 @@ class FileHandler(DatabaseHandler):
276
276
 
277
277
  header = df.columns.values.tolist()
278
278
 
279
- df = df.rename(columns={key: key.strip() for key in header})
279
+ df.columns = [key.strip() for key in header]
280
280
  df = df.applymap(clean_cell)
281
281
 
282
282
  header = [x.strip() for x in header]
@@ -25,6 +25,11 @@ test_file_content = [
25
25
  [3, -3, 0.3, "C"],
26
26
  ]
27
27
 
28
+ test_excel_sheet_content = [
29
+ ["Sheet_Name"],
30
+ ["Sheet1"],
31
+ ]
32
+
28
33
  file_records = [("one", 1, test_file_content[0]), ("two", 2, test_file_content[0])]
29
34
 
30
35
 
@@ -349,7 +354,18 @@ def test_get_file_path_with_url(mock_fetch_url):
349
354
  ],
350
355
  )
351
356
  def test_handle_source(file_path, expected_columns):
352
- df, col_map = FileHandler._handle_source(file_path)
357
+ sheet_name = None
358
+ # Excel files return a list of sheets when queried without a sheet name
359
+ if file_path.endswith(".xlsx"):
360
+ df, _ = FileHandler._handle_source(file_path)
361
+ assert isinstance(df, pandas.DataFrame)
362
+
363
+ assert df.columns.tolist() == test_excel_sheet_content[0]
364
+ assert len(df) == len(test_excel_sheet_content) - 1
365
+ assert df.values.tolist() == test_excel_sheet_content[1:]
366
+ sheet_name = test_excel_sheet_content[1][0]
367
+
368
+ df, _ = FileHandler._handle_source(file_path, sheet_name=sheet_name)
353
369
  assert isinstance(df, pandas.DataFrame)
354
370
  assert df.columns.tolist() == expected_columns
355
371
 
@@ -55,9 +55,23 @@ class JiraHandler(APIHandler):
55
55
  return self.connection
56
56
 
57
57
  s = requests.Session()
58
+ if self.connection_data.get("cloud", False):
59
+ params = {
60
+ "cloud": True,
61
+ "username": self.connection_data['jira_username'],
62
+ "password": self.connection_data['jira_api_token'],
63
+ "url": self.connection_data['jira_url'],
64
+ }
65
+ else:
66
+ params = {
67
+ "cloud": False,
68
+ "url": self.connection_data['jira_url'],
69
+ "session": s
70
+ }
71
+
58
72
  s.headers['Authorization'] = f"Bearer {self.connection_data['jira_api_token']}"
59
73
 
60
- self.connection = Jira(url= self.connection_data['jira_url'], session=s)
74
+ self.connection = Jira(**params)
61
75
  self.is_connected = True
62
76
 
63
77
 
@@ -10,9 +10,26 @@ from mindsdb_sql_parser import ast
10
10
 
11
11
  logger = log.getLogger(__name__)
12
12
 
13
+
14
+ def flatten_json(nested_json, parent_key="", separator="."):
15
+ """
16
+ Recursively flattens a nested JSON object into a dictionary with dot notation keys.
17
+ """
18
+ items = []
19
+ for k, v in nested_json.items():
20
+ new_key = f"{parent_key}{separator}{k}" if parent_key else k
21
+ if isinstance(v, dict):
22
+ items.extend(flatten_json(v, new_key, separator=separator).items())
23
+ else:
24
+ items.append((new_key, v))
25
+ return dict(items)
26
+
27
+
13
28
  class JiraProjectsTable(APITable):
14
29
  """Jira Projects Table implementation"""
30
+
15
31
  _MAX_API_RESULTS = 100
32
+
16
33
  def select(self, query: ast.Select) -> pd.DataFrame:
17
34
  """Pulls data from the Jira "get_all_project_issues" API endpoint
18
35
  Parameters
@@ -42,8 +59,8 @@ class JiraProjectsTable(APITable):
42
59
 
43
60
  for an_order in query.order_by:
44
61
  if an_order.field.parts[0] != "key":
45
- continue
46
- if an_order.field.parts[1] in ["reporter","assignee","status"]:
62
+ continue
63
+ if an_order.field.parts[1] in ["reporter", "assignee", "status"]:
47
64
  if issues_kwargs != {}:
48
65
  raise ValueError(
49
66
  "Duplicate order conditions found for reporter,status and assignee"
@@ -61,9 +78,9 @@ class JiraProjectsTable(APITable):
61
78
  raise ValueError(
62
79
  f"Order by unknown column {an_order.field.parts[1]}"
63
80
  )
64
- project = self.handler.connection_data['project']
81
+ project = self.handler.connection_data["project"]
65
82
  jira_project_df = self.call_jira_api(project)
66
-
83
+
67
84
  selected_columns = []
68
85
  for target in query.targets:
69
86
  if isinstance(target, ast.Star):
@@ -74,7 +91,6 @@ class JiraProjectsTable(APITable):
74
91
  else:
75
92
  raise ValueError(f"Unknown query target {type(target)}")
76
93
 
77
-
78
94
  if len(jira_project_df) == 0:
79
95
  jira_project_df = pd.DataFrame([], columns=selected_columns)
80
96
  return jira_project_df
@@ -88,7 +104,7 @@ class JiraProjectsTable(APITable):
88
104
  by=order_by_conditions["columns"],
89
105
  ascending=order_by_conditions["ascending"],
90
106
  )
91
-
107
+
92
108
  if query.limit:
93
109
  jira_project_df = jira_project_df.head(total_results)
94
110
 
@@ -102,12 +118,12 @@ class JiraProjectsTable(APITable):
102
118
  List of columns
103
119
  """
104
120
  return [
105
- 'key',
106
- 'summary',
107
- 'status',
108
- 'reporter',
109
- 'assignee',
110
- 'priority',
121
+ "key",
122
+ "summary",
123
+ "status",
124
+ "reporter",
125
+ "assignee",
126
+ "priority",
111
127
  ]
112
128
 
113
129
  def call_jira_api(self, project):
@@ -116,36 +132,41 @@ class JiraProjectsTable(APITable):
116
132
  max_records = jira.get_project_issues_count(project)
117
133
  max_records = 100
118
134
  jql_query = self.handler.construct_jql()
119
- max_results = self._MAX_API_RESULTS
135
+ max_results = self._MAX_API_RESULTS
120
136
  start_index = 0
121
137
  total = 1
122
138
  fields = [
123
- 'key',
124
- 'fields.summary',
125
- 'fields.status.name',
126
- 'fields.reporter.name',
127
- 'fields.assignee.name',
128
- 'fields.priority.name',
139
+ "key",
140
+ "fields.summary",
141
+ "fields.status.name",
142
+ "fields.reporter.displayName",
143
+ "fields.assignee.displayName",
144
+ "fields.priority.name",
129
145
  ]
130
146
 
131
147
  all_jira_issues_df = pd.DataFrame(columns=fields)
132
148
 
133
149
  while start_index <= total:
134
- results = self.handler.connect().jql(jql_query,start=start_index, limit=max_results)
135
- df = pd.json_normalize(results['issues'])
150
+ results = self.handler.connect().jql(
151
+ jql_query, start=start_index, limit=max_results
152
+ )
153
+ flattened_data = [flatten_json(item) for item in results["issues"]]
154
+ df = pd.DataFrame(flattened_data)
136
155
  df = df[fields]
137
156
  start_index += max_results
138
- total = max_records
157
+ total = results["total"]
139
158
  all_jira_issues_df = pd.concat([all_jira_issues_df, df], axis=0)
140
159
 
160
+ all_jira_issues_df = all_jira_issues_df.rename(
161
+ columns={
162
+ "key": "key",
163
+ "fields.summary": "summary",
164
+ "fields.reporter.displayName": "reporter",
165
+ "fields.assignee.displayName": "assignee",
166
+ "fields.priority.name": "priority",
167
+ "fields.status.name": "status",
168
+ },
169
+ errors="ignore",
170
+ )
141
171
 
142
- all_jira_issues_df = all_jira_issues_df.rename(columns={
143
- 'key': 'key',
144
- 'fields.summary': 'summary',
145
- 'fields.reporter.name':'reporter',
146
- 'fields.assignee.name':'assignee',
147
- 'fields.priority.name':'priority',
148
- 'fields.status.name':'status'})
149
-
150
172
  return all_jira_issues_df
151
-
@@ -0,0 +1,82 @@
1
+ from typing import Any, List
2
+ from langchain_core.embeddings import Embeddings
3
+ import requests
4
+
5
+
6
+ class FastAPIEmbeddings(Embeddings):
7
+ """An embedding extension that interfaces with FAST API. Useful for custom serving solutions."""
8
+
9
+ def __init__(
10
+ self,
11
+ api_base: str,
12
+ model: str,
13
+ batch_size: int = 32,
14
+ **kwargs: Any,
15
+ ):
16
+ """Initialize the embeddings class.
17
+
18
+ Args:
19
+ api_base: Base URL for the VLLM server
20
+ model: Model name/path to use for embeddings
21
+ batch_size: Batch size for generating embeddings
22
+ """
23
+ super().__init__()
24
+ self.api_base = api_base
25
+ self.model = model
26
+ self.batch_size = batch_size
27
+
28
+ # initialize requests here with the api_base
29
+
30
+ def _get_embeddings(self, texts: List[str]) -> List[str]:
31
+ """Get embeddings for a batch of text chunks.
32
+
33
+ Returns:
34
+ List of embeddings as strings. For sparse vectors, returns strings in format
35
+ "{key:value,...}/size" where size is the dimension of the vector space.
36
+ """
37
+
38
+ headers = {"accept": "application/json", "Content-Type": "application/json"}
39
+
40
+ data = {
41
+ "input": texts,
42
+ "model": self.model
43
+ }
44
+
45
+ response = requests.post(self.api_base, headers=headers, json=data)
46
+
47
+ response.raise_for_status()
48
+
49
+ embeddings = []
50
+ for response_dict in response.json()["data"]:
51
+ embedding = response_dict["embedding"]
52
+ embeddings.append(embedding)
53
+
54
+ return embeddings
55
+
56
+ def embed_documents(self, texts: List[str]) -> List[str]:
57
+ """Embed a list of documents using vLLM.
58
+
59
+ Args:
60
+ texts: List of documents to embed
61
+
62
+ Returns:
63
+ List of embeddings as strings, one for each document.
64
+ For sparse embeddings, returns strings in format "{key:value,...}/size"
65
+ For dense embeddings, returns JSON strings of float lists
66
+ """
67
+
68
+ return self._get_embeddings(texts)
69
+
70
+ def embed_query(self, text: str) -> str:
71
+ """Embed a single query text using vLLM.
72
+
73
+ Args:
74
+ text: Query text to embed
75
+
76
+ Returns:
77
+ Query embedding as a string.
78
+ For sparse embeddings, returns string in format "{key:value,...}/size"
79
+ For dense embeddings, returns JSON string of float list
80
+ """
81
+
82
+ return self._get_embeddings([text])[0]
@@ -10,6 +10,7 @@ from mindsdb.integrations.libs.base import BaseMLEngine
10
10
  from mindsdb.utilities import log
11
11
  from langchain_core.embeddings import Embeddings
12
12
  from mindsdb.integrations.handlers.langchain_embedding_handler.vllm_embeddings import VLLMEmbeddings
13
+ from mindsdb.integrations.handlers.langchain_embedding_handler.fastapi_embeddings import FastAPIEmbeddings
13
14
 
14
15
  logger = log.getLogger(__name__)
15
16
 
@@ -20,7 +21,10 @@ logger = log.getLogger(__name__)
20
21
  # This is used for the user to select the embedding model
21
22
  EMBEDDING_MODELS = {
22
23
  'VLLM': 'VLLMEmbeddings',
23
- 'vllm': 'VLLMEmbeddings'
24
+ 'vllm': 'VLLMEmbeddings',
25
+ 'FastAPI': 'FastAPIEmbeddings',
26
+ 'fastapi': 'FastAPIEmbeddings'
27
+
24
28
  }
25
29
 
26
30
  try:
@@ -55,6 +59,9 @@ def get_langchain_class(class_name: str) -> Embeddings:
55
59
  if class_name == "VLLMEmbeddings":
56
60
  return VLLMEmbeddings
57
61
 
62
+ if class_name == "FastAPIEmbeddings":
63
+ return FastAPIEmbeddings
64
+
58
65
  # Then try langchain_community.embeddings
59
66
  try:
60
67
  module = importlib.import_module("langchain_community.embeddings")