MindsDB 25.1.2.1__py3-none-any.whl → 25.1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (77) hide show
  1. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/METADATA +244 -242
  2. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/RECORD +76 -67
  3. mindsdb/__about__.py +1 -1
  4. mindsdb/__main__.py +5 -3
  5. mindsdb/api/executor/__init__.py +0 -1
  6. mindsdb/api/executor/command_executor.py +2 -1
  7. mindsdb/api/executor/data_types/answer.py +1 -1
  8. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +7 -2
  9. mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -1
  10. mindsdb/api/executor/sql_query/__init__.py +1 -0
  11. mindsdb/api/executor/sql_query/result_set.py +36 -21
  12. mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +1 -1
  13. mindsdb/api/executor/sql_query/steps/join_step.py +4 -4
  14. mindsdb/api/executor/sql_query/steps/map_reduce_step.py +6 -39
  15. mindsdb/api/executor/utilities/sql.py +2 -10
  16. mindsdb/api/http/namespaces/knowledge_bases.py +3 -3
  17. mindsdb/api/http/namespaces/sql.py +3 -1
  18. mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +2 -1
  19. mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +7 -0
  20. mindsdb/api/postgres/postgres_proxy/executor/executor.py +2 -1
  21. mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +2 -2
  22. mindsdb/integrations/handlers/chromadb_handler/requirements.txt +1 -1
  23. mindsdb/integrations/handlers/file_handler/file_handler.py +1 -1
  24. mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +17 -1
  25. mindsdb/integrations/handlers/jira_handler/jira_handler.py +15 -1
  26. mindsdb/integrations/handlers/jira_handler/jira_table.py +52 -31
  27. mindsdb/integrations/handlers/langchain_embedding_handler/fastapi_embeddings.py +82 -0
  28. mindsdb/integrations/handlers/langchain_embedding_handler/langchain_embedding_handler.py +8 -1
  29. mindsdb/integrations/handlers/langchain_handler/requirements.txt +1 -1
  30. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +48 -16
  31. mindsdb/integrations/handlers/pinecone_handler/pinecone_handler.py +123 -72
  32. mindsdb/integrations/handlers/pinecone_handler/requirements.txt +1 -1
  33. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +12 -6
  34. mindsdb/integrations/handlers/slack_handler/slack_handler.py +13 -2
  35. mindsdb/integrations/handlers/slack_handler/slack_tables.py +21 -1
  36. mindsdb/integrations/libs/ml_handler_process/learn_process.py +1 -1
  37. mindsdb/integrations/utilities/rag/loaders/vector_store_loader/pgvector.py +76 -27
  38. mindsdb/integrations/utilities/rag/loaders/vector_store_loader/vector_store_loader.py +18 -1
  39. mindsdb/integrations/utilities/rag/pipelines/rag.py +73 -18
  40. mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +166 -108
  41. mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +36 -14
  42. mindsdb/integrations/utilities/rag/settings.py +8 -2
  43. mindsdb/integrations/utilities/sql_utils.py +1 -1
  44. mindsdb/interfaces/agents/agents_controller.py +3 -5
  45. mindsdb/interfaces/agents/langchain_agent.py +112 -150
  46. mindsdb/interfaces/agents/langfuse_callback_handler.py +0 -37
  47. mindsdb/interfaces/agents/mindsdb_database_agent.py +15 -13
  48. mindsdb/interfaces/chatbot/chatbot_controller.py +7 -11
  49. mindsdb/interfaces/chatbot/chatbot_task.py +16 -5
  50. mindsdb/interfaces/chatbot/memory.py +58 -13
  51. mindsdb/interfaces/database/projects.py +17 -15
  52. mindsdb/interfaces/database/views.py +12 -25
  53. mindsdb/interfaces/knowledge_base/controller.py +39 -15
  54. mindsdb/interfaces/model/functions.py +15 -4
  55. mindsdb/interfaces/model/model_controller.py +4 -7
  56. mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +47 -38
  57. mindsdb/interfaces/skills/retrieval_tool.py +10 -3
  58. mindsdb/interfaces/skills/skill_tool.py +97 -53
  59. mindsdb/interfaces/skills/sql_agent.py +77 -36
  60. mindsdb/interfaces/storage/db.py +1 -1
  61. mindsdb/migrations/versions/2025-01-15_c06c35f7e8e1_project_company.py +88 -0
  62. mindsdb/utilities/cache.py +7 -4
  63. mindsdb/utilities/context.py +11 -1
  64. mindsdb/utilities/langfuse.py +264 -0
  65. mindsdb/utilities/log.py +20 -2
  66. mindsdb/utilities/otel/__init__.py +206 -0
  67. mindsdb/utilities/otel/logger.py +25 -0
  68. mindsdb/utilities/otel/meter.py +19 -0
  69. mindsdb/utilities/otel/metric_handlers/__init__.py +25 -0
  70. mindsdb/utilities/otel/tracer.py +16 -0
  71. mindsdb/utilities/partitioning.py +52 -0
  72. mindsdb/utilities/render/sqlalchemy_render.py +7 -1
  73. mindsdb/utilities/utils.py +34 -0
  74. mindsdb/utilities/otel.py +0 -72
  75. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/LICENSE +0 -0
  76. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/WHEEL +0 -0
  77. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/top_level.txt +0 -0
@@ -3,6 +3,6 @@ wikipedia==1.4.0
3
3
  tiktoken
4
4
  anthropic>=0.26.1
5
5
  litellm==1.44.8
6
- chromadb # Knowledge bases.
6
+ chromadb~=0.6.3 # Knowledge bases.
7
7
  -r mindsdb/integrations/handlers/openai_handler/requirements.txt
8
8
  -r mindsdb/integrations/handlers/langchain_embedding_handler/requirements.txt
@@ -37,6 +37,11 @@ class PgVectorHandler(VectorStoreHandler, PostgresHandler):
37
37
  super().__init__(name=name, **kwargs)
38
38
  self._is_shared_db = False
39
39
  self._is_vector_registered = False
40
+ # we get these from the connection args on PostgresHandler parent
41
+ self._is_sparse = self.connection_args.get('is_sparse', False)
42
+ self._vector_size = self.connection_args.get('vector_size', None)
43
+ if self._is_sparse and not self._vector_size:
44
+ raise ValueError("vector_size is required when is_sparse=True")
40
45
  self.connect()
41
46
 
42
47
  def _make_connection_args(self):
@@ -190,13 +195,30 @@ class PgVectorHandler(VectorStoreHandler, PostgresHandler):
190
195
  if filter_conditions:
191
196
 
192
197
  if embedding_search:
193
- # if search vector, return similar rows, apply other filters after if any
194
198
  search_vector = filter_conditions["embeddings"]["value"][0]
195
199
  filter_conditions.pop("embeddings")
196
- return f"SELECT {targets} FROM {table_name} ORDER BY embeddings <=> '{search_vector}' {after_from_clause}"
200
+
201
+ if self._is_sparse:
202
+ # Convert dict to sparse vector if needed
203
+ if isinstance(search_vector, dict):
204
+ from pgvector.utils import SparseVector
205
+ embedding = SparseVector(search_vector, self._vector_size)
206
+ search_vector = embedding.to_text()
207
+ # Use inner product for sparse vectors
208
+ distance_op = "<#>"
209
+ else:
210
+ # Convert list to vector string if needed
211
+ if isinstance(search_vector, list):
212
+ search_vector = f"[{','.join(str(x) for x in search_vector)}]"
213
+ # Use cosine similarity for dense vectors
214
+ distance_op = "<=>"
215
+
216
+ return f"SELECT {targets} FROM {table_name} ORDER BY embeddings {distance_op} '{search_vector}' ASC {after_from_clause}"
217
+
197
218
  else:
198
- # if filter conditions, return filtered rows
219
+ # if filter conditions, return rows that satisfy the conditions
199
220
  return f"SELECT {targets} FROM {table_name} {after_from_clause}"
221
+
200
222
  else:
201
223
  # if no filter conditions, return all rows
202
224
  return f"SELECT {targets} FROM {table_name} {after_from_clause}"
@@ -283,7 +305,7 @@ class PgVectorHandler(VectorStoreHandler, PostgresHandler):
283
305
  # See https://docs.pgvecto.rs/use-case/hybrid-search.html#advanced-search-merge-the-results-of-full-text-search-and-vector-search.
284
306
  #
285
307
  # We can break down the below query as follows:
286
- #
308
+ #
287
309
  # Start with a CTE (Common Table Expression) called semantic_search (https://www.postgresql.org/docs/current/queries-with.html).
288
310
  # This expression calculates rank by the defined distance function, which measures the distance between the
289
311
  # embeddings column and the given embeddings vector. Results are ordered by this rank.
@@ -339,17 +361,28 @@ class PgVectorHandler(VectorStoreHandler, PostgresHandler):
339
361
  full_search_query = f'{semantic_search_cte}{full_text_search_cte}{hybrid_select}'
340
362
  return self.raw_query(full_search_query)
341
363
 
342
- def create_table(self, table_name: str, sparse=False, if_not_exists=True):
343
- """
344
- Run a create table query on the pgvector database.
345
- """
346
- table_name = self._check_table(table_name)
347
-
348
- query = f"CREATE TABLE IF NOT EXISTS {table_name} (id text PRIMARY KEY, content text, embeddings vector, metadata jsonb)"
349
- if sparse:
350
- query = f"CREATE TABLE IF NOT EXISTS {table_name} (id text PRIMARY KEY, content text, embeddings sparsevec, metadata jsonb)"
351
-
352
- self.raw_query(query)
364
+ def create_table(self, table_name: str):
365
+ """Create a table with a vector column."""
366
+ with self.connection.cursor() as cur:
367
+ # For sparse vectors, use sparsevec type
368
+ vector_column_type = 'sparsevec' if self._is_sparse else 'vector'
369
+
370
+ # Vector size is required for sparse vectors, optional for dense
371
+ if self._is_sparse and not self._vector_size:
372
+ raise ValueError("vector_size is required for sparse vectors")
373
+
374
+ # Add vector size specification only if provided
375
+ size_spec = f"({self._vector_size})" if self._vector_size is not None else "()"
376
+
377
+ cur.execute(f"""
378
+ CREATE TABLE IF NOT EXISTS {table_name} (
379
+ id SERIAL PRIMARY KEY,
380
+ embeddings {vector_column_type}{size_spec},
381
+ content TEXT,
382
+ metadata JSONB
383
+ )
384
+ """)
385
+ self.connection.commit()
353
386
 
354
387
  def insert(
355
388
  self, table_name: str, data: pd.DataFrame
@@ -447,4 +480,3 @@ class PgVectorHandler(VectorStoreHandler, PostgresHandler):
447
480
  """
448
481
  table_name = self._check_table(table_name)
449
482
  self.raw_query(f"DROP TABLE IF EXISTS {table_name}")
450
-
@@ -1,8 +1,10 @@
1
+ import ast
1
2
  from typing import List, Optional
2
3
 
3
- import pinecone
4
+ import numpy as np
5
+ from pinecone import Pinecone, ServerlessSpec
6
+ from pinecone.core.openapi.shared.exceptions import NotFoundException, PineconeApiException
4
7
  import pandas as pd
5
- import ast
6
8
 
7
9
  from mindsdb.integrations.libs.response import RESPONSE_TYPE
8
10
  from mindsdb.integrations.libs.response import HandlerResponse
@@ -18,32 +20,30 @@ from mindsdb.utilities import log
18
20
 
19
21
  logger = log.getLogger(__name__)
20
22
 
23
+ DEFAULT_CREATE_TABLE_PARAMS = {
24
+ "dimension": 8,
25
+ "metric": "cosine",
26
+ "spec": {
27
+ "cloud": "aws",
28
+ "region": "us-east-1"
29
+ }
30
+ }
31
+ MAX_FETCH_LIMIT = 10000
32
+ UPSERT_BATCH_SIZE = 99 # API reccomendation
33
+
21
34
 
22
35
  class PineconeHandler(VectorStoreHandler):
23
36
  """This handler handles connection and execution of the Pinecone statements."""
24
37
 
25
38
  name = "pinecone"
26
39
 
27
- def __init__(self, name: str, **kwargs):
40
+ def __init__(self, name: str, connection_data: dict, **kwargs):
28
41
  super().__init__(name)
29
- self.MAX_FETCH_LIMIT = 10000
30
- self._connection_data = kwargs.get("connection_data")
31
- self._client_config = {
32
- "api_key": self._connection_data.get("api_key"),
33
- "environment": self._connection_data.get("environment")
34
- }
35
- self._table_create_params = {
36
- "dimension": 8,
37
- "metric": "cosine",
38
- "pods": 1,
39
- "replicas": 1,
40
- "pod_type": 'p1',
41
- }
42
- for key in self._table_create_params:
43
- if key in self._connection_data:
44
- self._table_create_params[key] = self._connection_data[key]
42
+ self.connection_data = connection_data
43
+ self.kwargs = kwargs
44
+
45
+ self.connection = None
45
46
  self.is_connected = False
46
- self.connect()
47
47
 
48
48
  def __del__(self):
49
49
  if self.is_connected is True:
@@ -51,7 +51,8 @@ class PineconeHandler(VectorStoreHandler):
51
51
 
52
52
  def _get_index_handle(self, index_name):
53
53
  """Returns handler to index specified by `index_name`"""
54
- index = pinecone.Index(index_name)
54
+ connection = self.connect()
55
+ index = connection.Index(index_name)
55
56
  try:
56
57
  index.describe_index_stats()
57
58
  except Exception:
@@ -135,10 +136,15 @@ class PineconeHandler(VectorStoreHandler):
135
136
 
136
137
  def connect(self):
137
138
  """Connect to a pinecone database."""
139
+ if self.is_connected is True:
140
+ return self.connection
141
+
142
+ if 'api_key' not in self.connection_data:
143
+ raise ValueError('Required parameter (api_key) must be provided.')
144
+
138
145
  try:
139
- pinecone.init(api_key=self._client_config["api_key"], environment=self._client_config["environment"])
140
- pinecone.list_indexes()
141
- self.is_connected = True
146
+ self.connection = Pinecone(api_key=self.connection_data['api_key'])
147
+ return self.connection
142
148
  except Exception as e:
143
149
  logger.error(f"Error connecting to Pinecone client, {e}!")
144
150
  self.is_connected = False
@@ -147,55 +153,99 @@ class PineconeHandler(VectorStoreHandler):
147
153
  """Close the pinecone connection."""
148
154
  if self.is_connected is False:
149
155
  return
150
- pinecone.init(api_key="", environment="")
156
+ self.connection = None
151
157
  self.is_connected = False
152
158
 
153
159
  def check_connection(self):
154
160
  """Check the connection to pinecone."""
155
- response_code = StatusResponse(False)
161
+ response = StatusResponse(False)
162
+ need_to_close = self.is_connected is False
163
+
156
164
  try:
157
- pinecone.list_indexes()
158
- response_code.success = True
165
+ connection = self.connect()
166
+ connection.list_indexes()
167
+ response.success = True
159
168
  except Exception as e:
160
169
  logger.error(f"Error connecting to pinecone , {e}!")
161
- response_code.error_message = str(e)
162
- return response_code
170
+ response.error_message = str(e)
171
+
172
+ if response.success is True and need_to_close:
173
+ self.disconnect()
174
+ if response.success is False and self.is_connected is True:
175
+ self.is_connected = False
176
+
177
+ return response
163
178
 
164
179
  def get_tables(self) -> HandlerResponse:
165
180
  """Get the list of indexes in the pinecone database."""
166
- indexes = pinecone.list_indexes()
167
- indexes_names = pd.DataFrame(
168
- columns=["index_name"],
169
- data=[index for index in indexes],
181
+ connection = self.connect()
182
+ indexes = connection.list_indexes()
183
+ df = pd.DataFrame(
184
+ columns=["table_name"],
185
+ data=[index['name'] for index in indexes],
170
186
  )
171
- return Response(resp_type=RESPONSE_TYPE.TABLE, data_frame=indexes_names)
187
+ return Response(resp_type=RESPONSE_TYPE.TABLE, data_frame=df)
172
188
 
173
189
  def create_table(self, table_name: str, if_not_exists=True):
174
190
  """Create an index with the given name in the Pinecone database."""
175
- pinecone.create_index(name=table_name, **self._table_create_params)
191
+ connection = self.connect()
192
+
193
+ # TODO: Should other parameters be supported? Pod indexes?
194
+ # TODO: Should there be a better way to provide these parameters rather than when establishing the connection?
195
+ create_table_params = {}
196
+ for key, val in DEFAULT_CREATE_TABLE_PARAMS.items():
197
+ if key in self.connection_data:
198
+ create_table_params[key] = self.connection_data[key]
199
+ else:
200
+ create_table_params[key] = val
201
+
202
+ create_table_params["spec"] = ServerlessSpec(**create_table_params["spec"])
203
+
204
+ try:
205
+ connection.create_index(name=table_name, **create_table_params)
206
+ except PineconeApiException as pinecone_error:
207
+ if pinecone_error.status == 409 and if_not_exists:
208
+ return
209
+ raise Exception(f"Error creating index '{table_name}': {pinecone_error}")
176
210
 
177
- def insert(self, table_name: str, data: pd.DataFrame, columns: List[str] = None):
211
+ def insert(self, table_name: str, data: pd.DataFrame):
178
212
  """Insert data into pinecone index passed in through `table_name` parameter."""
179
- upsert_batch_size = 99 # API reccomendation
180
213
  index = self._get_index_handle(table_name)
181
214
  if index is None:
182
215
  raise Exception(f"Error getting index '{table_name}', are you sure the name is correct?")
183
216
 
184
217
  data.rename(columns={
185
218
  TableField.ID.value: "id",
186
- TableField.EMBEDDINGS.value: "values",
187
- TableField.METADATA.value: "metadata"},
219
+ TableField.EMBEDDINGS.value: "values"},
188
220
  inplace=True)
189
- data = data[["id", "values", "metadata"]]
190
221
 
191
- for chunk in (data[pos:pos + upsert_batch_size] for pos in range(0, len(data), upsert_batch_size)):
222
+ columns = ["id", "values"]
223
+
224
+ if TableField.METADATA.value in data.columns:
225
+ data.rename(columns={TableField.METADATA.value: "metadata"}, inplace=True)
226
+ # fill None and NaN values with empty dict
227
+ if data['metadata'].isnull().any():
228
+ data['metadata'] = data['metadata'].apply(lambda x: {} if x is None or (isinstance(x, float) and np.isnan(x)) else x)
229
+ columns.append("metadata")
230
+
231
+ data = data[columns]
232
+
233
+ # convert the embeddings to lists if they are strings
234
+ data["values"] = data["values"].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
235
+
236
+ for chunk in (data[pos:pos + UPSERT_BATCH_SIZE] for pos in range(0, len(data), UPSERT_BATCH_SIZE)):
192
237
  chunk = chunk.to_dict(orient="records")
193
238
  index.upsert(vectors=chunk)
194
239
 
195
240
  def drop_table(self, table_name: str, if_exists=True):
196
241
  """Delete an index passed in through `table_name` from the pinecone ."""
197
-
198
- pinecone.delete_index(table_name)
242
+ connection = self.connect()
243
+ try:
244
+ connection.delete_index(table_name)
245
+ except NotFoundException:
246
+ if if_exists:
247
+ return
248
+ raise Exception(f"Error deleting index '{table_name}', are you sure the name is correct?")
199
249
 
200
250
  def delete(self, table_name: str, conditions: List[FilterCondition] = None):
201
251
  """Delete records in pinecone index `table_name` based on ids or based on metadata conditions."""
@@ -225,6 +275,7 @@ class PineconeHandler(VectorStoreHandler):
225
275
  limit: int = None,
226
276
  ):
227
277
  """Run query on pinecone index named `table_name` and get results."""
278
+ # TODO: Add support for namespaces.
228
279
  index = self._get_index_handle(table_name)
229
280
  if index is None:
230
281
  raise Exception(f"Error getting index '{table_name}', are you sure the name is correct?")
@@ -233,23 +284,28 @@ class PineconeHandler(VectorStoreHandler):
233
284
  "include_values": True,
234
285
  "include_metadata": True
235
286
  }
287
+
236
288
  # check for metadata filter
237
289
  metadata_filters = self._translate_metadata_condition(conditions)
238
- # check for vector filter
239
- vector_filter = (
240
- None
241
- if conditions is None
242
- else [
243
- condition.value
244
- for condition in conditions
245
- if condition.column == TableField.SEARCH_VECTOR.value
246
- ]
247
- )
248
- if vector_filter:
249
- if len(vector_filter) > 1:
290
+ if metadata_filters is not None:
291
+ query["filter"] = metadata_filters
292
+
293
+ # check for vector and id filters
294
+ vector_filters = []
295
+ id_filters = []
296
+
297
+ if conditions:
298
+ for condition in conditions:
299
+ if condition.column == TableField.SEARCH_VECTOR.value:
300
+ vector_filters.append(condition.value)
301
+ elif condition.column == TableField.ID.value:
302
+ id_filters.append(condition.value)
303
+
304
+ if vector_filters:
305
+ if len(vector_filters) > 1:
250
306
  raise Exception("You cannot have multiple search_vectors in query")
251
307
 
252
- query["vector"] = vector_filter[0]
308
+ query["vector"] = vector_filters[0]
253
309
  # For subqueries, the vector filter is a list of list of strings
254
310
  if isinstance(query["vector"], list) and isinstance(query["vector"][0], str):
255
311
  if len(query["vector"]) > 1:
@@ -260,26 +316,21 @@ class PineconeHandler(VectorStoreHandler):
260
316
  except Exception as e:
261
317
  raise Exception(f"Cannot parse the search vector '{query['vector']}'into a list: {e}")
262
318
 
263
- # check for limit
264
- if limit is not None:
265
- query["top_k"] = limit
266
- else:
267
- query["top_k"] = self.MAX_FETCH_LIMIT
268
- if metadata_filters is not None:
269
- query["filter"] = metadata_filters
270
- # check for id filter
271
- id_filters = None
272
- if conditions is not None:
273
- id_filters = [
274
- condition.value
275
- for condition in conditions
276
- if condition.column == TableField.ID.value
277
- ] or None
278
319
  if id_filters:
279
320
  if len(id_filters) > 1:
280
321
  raise Exception("You cannot have multiple IDs in query")
281
322
 
282
323
  query["id"] = id_filters[0]
324
+
325
+ if not vector_filters and not id_filters:
326
+ raise Exception("You must provide either a search_vector or an ID in the query")
327
+
328
+ # check for limit
329
+ if limit is not None:
330
+ query["top_k"] = limit
331
+ else:
332
+ query["top_k"] = MAX_FETCH_LIMIT
333
+
283
334
  # exec query
284
335
  try:
285
336
  result = index.query(**query)
@@ -1 +1 @@
1
- pinecone-client
1
+ pinecone-client==5.0.1
@@ -1,5 +1,6 @@
1
1
  import time
2
2
  import json
3
+ from typing import Optional
3
4
 
4
5
  import pandas as pd
5
6
  import psycopg
@@ -161,7 +162,7 @@ class PostgresHandler(DatabaseHandler):
161
162
  'float8': 'float64'
162
163
  }
163
164
  columns = df.columns
164
- df = df.set_axis(range(len(columns)), axis=1)
165
+ df.columns = list(range(len(columns)))
165
166
  for column_index, column_name in enumerate(df.columns):
166
167
  col = df[column_name]
167
168
  if str(col.dtype) == 'object':
@@ -172,7 +173,7 @@ class PostgresHandler(DatabaseHandler):
172
173
  df[column_name] = col.astype(types_map[pg_type.name])
173
174
  except ValueError as e:
174
175
  logger.error(f'Error casting column {col.name} to {types_map[pg_type.name]}: {e}')
175
- return df.set_axis(columns, axis=1)
176
+ df.columns = columns
176
177
 
177
178
  @profiler.profile()
178
179
  def native_query(self, query: str, params=None) -> Response:
@@ -202,7 +203,7 @@ class PostgresHandler(DatabaseHandler):
202
203
  result,
203
204
  columns=[x.name for x in cur.description]
204
205
  )
205
- df = self._cast_dtypes(df, cur.description)
206
+ self._cast_dtypes(df, cur.description)
206
207
  response = Response(
207
208
  RESPONSE_TYPE.TABLE,
208
209
  df
@@ -281,21 +282,27 @@ class PostgresHandler(DatabaseHandler):
281
282
  """
282
283
  return self.native_query(query)
283
284
 
284
- def get_columns(self, table_name: str) -> Response:
285
+ def get_columns(self, table_name: str, schema_name: Optional[str] = None) -> Response:
285
286
  """
286
287
  Retrieves column details for a specified table in the PostgreSQL database.
287
288
 
288
289
  Args:
289
290
  table_name (str): The name of the table for which to retrieve column information.
291
+ schema_name (str): The name of the schema in which the table is located.
290
292
 
291
293
  Returns:
292
294
  Response: A response object containing the column details, formatted as per the `Response` class.
295
+
293
296
  Raises:
294
297
  ValueError: If the 'table_name' is not a valid string.
295
298
  """
296
299
 
297
300
  if not table_name or not isinstance(table_name, str):
298
301
  raise ValueError("Invalid table name provided.")
302
+ if isinstance(schema_name, str):
303
+ schema_name = f"'{schema_name}'"
304
+ else:
305
+ schema_name = 'current_schema()'
299
306
  query = f"""
300
307
  SELECT
301
308
  column_name as "Field",
@@ -305,12 +312,11 @@ class PostgresHandler(DatabaseHandler):
305
312
  WHERE
306
313
  table_name = '{table_name}'
307
314
  AND
308
- table_schema = current_schema()
315
+ table_schema = {schema_name}
309
316
  """
310
317
  return self.native_query(query)
311
318
 
312
319
  def subscribe(self, stop_event, callback, table_name, columns=None, **kwargs):
313
-
314
320
  config = self._make_connection_args()
315
321
  config['autocommit'] = True
316
322
 
@@ -231,6 +231,9 @@ class SlackHandler(APIChatHandler):
231
231
  'polling': {
232
232
  'type': 'realtime',
233
233
  },
234
+ 'memory': {
235
+ 'type': 'handler',
236
+ },
234
237
  'tables': [
235
238
  {
236
239
  'chat_table': {
@@ -238,7 +241,7 @@ class SlackHandler(APIChatHandler):
238
241
  'chat_id_col': 'channel_id',
239
242
  'username_col': 'user',
240
243
  'text_col': 'text',
241
- 'time_col': 'thread_ts',
244
+ 'time_col': 'created_at',
242
245
  }
243
246
  },
244
247
  {
@@ -264,7 +267,7 @@ class SlackHandler(APIChatHandler):
264
267
  user_info = web_connection.auth_test().data
265
268
  return user_info['bot_id']
266
269
 
267
- def subscribe(self, stop_event: threading.Event, callback: Callable, **kwargs: Any) -> None:
270
+ def subscribe(self, stop_event: threading.Event, callback: Callable, table_name: Text, columns: List = None, **kwargs: Any) -> None:
268
271
  """
269
272
  Subscribes to the Slack API using the Socket Mode for real-time responses to messages.
270
273
 
@@ -274,6 +277,14 @@ class SlackHandler(APIChatHandler):
274
277
  table_name (Text): The name of the table to subscribe to.
275
278
  kwargs: Arbitrary keyword arguments.
276
279
  """
280
+ if table_name not in ['messages', 'threads']:
281
+ raise RuntimeError(f'Table {table_name} is not supported for subscription.')
282
+
283
+ # Raise an error if columns are provided.
284
+ # Since Slack subscriptions depend on events and not changes to the virtual tables, columns are not supported.
285
+ if columns:
286
+ raise RuntimeError('Columns are not supported for Slack subscriptions.')
287
+
277
288
  self._socket_connection = SocketModeClient(
278
289
  # This app-level token will be used only for establishing a connection.
279
290
  app_token=self.connection_data['app_token'], # xapp-A111-222-xyz
@@ -6,7 +6,7 @@ import pandas as pd
6
6
  from slack_sdk.errors import SlackApiError
7
7
 
8
8
  from mindsdb.integrations.libs.api_handler import APIResource
9
- from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions, FilterCondition, FilterOperator
9
+ from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions, FilterCondition, FilterOperator, SortColumn
10
10
  from mindsdb.utilities import log
11
11
 
12
12
  logger = log.getLogger(__name__)
@@ -203,6 +203,7 @@ class SlackMessagesTable(APIResource):
203
203
  self,
204
204
  conditions: List[FilterCondition] = None,
205
205
  limit: int = None,
206
+ sort: List[SortColumn] = None,
206
207
  **kwargs: Any
207
208
  ) -> pd.DataFrame:
208
209
  """
@@ -222,6 +223,7 @@ class SlackMessagesTable(APIResource):
222
223
  Args:
223
224
  conditions (List[FilterCondition]): The conditions to filter the messages.
224
225
  limit (int): The limit of the messages to return.
226
+ sort (List[SortColumn]): The columns to sort the messages by.
225
227
  kwargs (Any): Arbitrary keyword arguments.
226
228
 
227
229
  Raises:
@@ -306,6 +308,14 @@ class SlackMessagesTable(APIResource):
306
308
  # Translate the time stamp into a 'created_at' field.
307
309
  result['created_at'] = pd.to_datetime(result['ts'].astype(float), unit='s').dt.strftime('%Y-%m-%d %H:%M:%S')
308
310
 
311
+ # Sort the messages by the specified columns.
312
+ if sort:
313
+ result.sort_values(
314
+ by=[col.column for col in sort],
315
+ ascending=[col.ascending for col in sort],
316
+ inplace=True
317
+ )
318
+
309
319
  return result
310
320
 
311
321
  def insert(self, query: Insert):
@@ -496,6 +506,7 @@ class SlackThreadsTable(APIResource):
496
506
  self,
497
507
  conditions: List[FilterCondition] = None,
498
508
  limit: int = None,
509
+ sort: List[SortColumn] = None,
499
510
  **kwargs: Any
500
511
  ) -> pd.DataFrame:
501
512
  """
@@ -514,6 +525,7 @@ class SlackThreadsTable(APIResource):
514
525
  Args:
515
526
  conditions (List[FilterCondition]): The conditions to filter the messages.
516
527
  limit (int): The limit of the messages to return.
528
+ sort (List[SortColumn]): The columns to sort the messages by.
517
529
  kwargs (Any): Arbitrary keyword arguments.
518
530
 
519
531
  Raises:
@@ -591,6 +603,14 @@ class SlackThreadsTable(APIResource):
591
603
  result['channel_id'] = params['channel']
592
604
  result['channel_name'] = channel['name'] if 'name' in channel else None
593
605
 
606
+ # Sort the messages by the specified columns.
607
+ if sort:
608
+ result.sort_values(
609
+ by=[col.column for col in sort],
610
+ ascending=[col.ascending for col in sort],
611
+ inplace=True
612
+ )
613
+
594
614
  return result
595
615
 
596
616
  def insert(self, query: Insert):
@@ -8,7 +8,7 @@ from sqlalchemy.orm.attributes import flag_modified
8
8
  from mindsdb_sql_parser import parse_sql
9
9
  from mindsdb_sql_parser.ast import Identifier, Select, Star, NativeQuery
10
10
 
11
- from mindsdb.api.executor import SQLQuery
11
+ from mindsdb.api.executor.sql_query import SQLQuery
12
12
  import mindsdb.utilities.profiler as profiler
13
13
  from mindsdb.utilities.functions import mark_process
14
14
  from mindsdb.utilities.config import Config