MindsDB 25.1.2.1__py3-none-any.whl → 25.1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (77) hide show
  1. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/METADATA +244 -242
  2. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/RECORD +76 -67
  3. mindsdb/__about__.py +1 -1
  4. mindsdb/__main__.py +5 -3
  5. mindsdb/api/executor/__init__.py +0 -1
  6. mindsdb/api/executor/command_executor.py +2 -1
  7. mindsdb/api/executor/data_types/answer.py +1 -1
  8. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +7 -2
  9. mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -1
  10. mindsdb/api/executor/sql_query/__init__.py +1 -0
  11. mindsdb/api/executor/sql_query/result_set.py +36 -21
  12. mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +1 -1
  13. mindsdb/api/executor/sql_query/steps/join_step.py +4 -4
  14. mindsdb/api/executor/sql_query/steps/map_reduce_step.py +6 -39
  15. mindsdb/api/executor/utilities/sql.py +2 -10
  16. mindsdb/api/http/namespaces/knowledge_bases.py +3 -3
  17. mindsdb/api/http/namespaces/sql.py +3 -1
  18. mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +2 -1
  19. mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +7 -0
  20. mindsdb/api/postgres/postgres_proxy/executor/executor.py +2 -1
  21. mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +2 -2
  22. mindsdb/integrations/handlers/chromadb_handler/requirements.txt +1 -1
  23. mindsdb/integrations/handlers/file_handler/file_handler.py +1 -1
  24. mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +17 -1
  25. mindsdb/integrations/handlers/jira_handler/jira_handler.py +15 -1
  26. mindsdb/integrations/handlers/jira_handler/jira_table.py +52 -31
  27. mindsdb/integrations/handlers/langchain_embedding_handler/fastapi_embeddings.py +82 -0
  28. mindsdb/integrations/handlers/langchain_embedding_handler/langchain_embedding_handler.py +8 -1
  29. mindsdb/integrations/handlers/langchain_handler/requirements.txt +1 -1
  30. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +48 -16
  31. mindsdb/integrations/handlers/pinecone_handler/pinecone_handler.py +123 -72
  32. mindsdb/integrations/handlers/pinecone_handler/requirements.txt +1 -1
  33. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +12 -6
  34. mindsdb/integrations/handlers/slack_handler/slack_handler.py +13 -2
  35. mindsdb/integrations/handlers/slack_handler/slack_tables.py +21 -1
  36. mindsdb/integrations/libs/ml_handler_process/learn_process.py +1 -1
  37. mindsdb/integrations/utilities/rag/loaders/vector_store_loader/pgvector.py +76 -27
  38. mindsdb/integrations/utilities/rag/loaders/vector_store_loader/vector_store_loader.py +18 -1
  39. mindsdb/integrations/utilities/rag/pipelines/rag.py +73 -18
  40. mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +166 -108
  41. mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +36 -14
  42. mindsdb/integrations/utilities/rag/settings.py +8 -2
  43. mindsdb/integrations/utilities/sql_utils.py +1 -1
  44. mindsdb/interfaces/agents/agents_controller.py +3 -5
  45. mindsdb/interfaces/agents/langchain_agent.py +112 -150
  46. mindsdb/interfaces/agents/langfuse_callback_handler.py +0 -37
  47. mindsdb/interfaces/agents/mindsdb_database_agent.py +15 -13
  48. mindsdb/interfaces/chatbot/chatbot_controller.py +7 -11
  49. mindsdb/interfaces/chatbot/chatbot_task.py +16 -5
  50. mindsdb/interfaces/chatbot/memory.py +58 -13
  51. mindsdb/interfaces/database/projects.py +17 -15
  52. mindsdb/interfaces/database/views.py +12 -25
  53. mindsdb/interfaces/knowledge_base/controller.py +39 -15
  54. mindsdb/interfaces/model/functions.py +15 -4
  55. mindsdb/interfaces/model/model_controller.py +4 -7
  56. mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +47 -38
  57. mindsdb/interfaces/skills/retrieval_tool.py +10 -3
  58. mindsdb/interfaces/skills/skill_tool.py +97 -53
  59. mindsdb/interfaces/skills/sql_agent.py +77 -36
  60. mindsdb/interfaces/storage/db.py +1 -1
  61. mindsdb/migrations/versions/2025-01-15_c06c35f7e8e1_project_company.py +88 -0
  62. mindsdb/utilities/cache.py +7 -4
  63. mindsdb/utilities/context.py +11 -1
  64. mindsdb/utilities/langfuse.py +264 -0
  65. mindsdb/utilities/log.py +20 -2
  66. mindsdb/utilities/otel/__init__.py +206 -0
  67. mindsdb/utilities/otel/logger.py +25 -0
  68. mindsdb/utilities/otel/meter.py +19 -0
  69. mindsdb/utilities/otel/metric_handlers/__init__.py +25 -0
  70. mindsdb/utilities/otel/tracer.py +16 -0
  71. mindsdb/utilities/partitioning.py +52 -0
  72. mindsdb/utilities/render/sqlalchemy_render.py +7 -1
  73. mindsdb/utilities/utils.py +34 -0
  74. mindsdb/utilities/otel.py +0 -72
  75. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/LICENSE +0 -0
  76. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/WHEEL +0 -0
  77. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/top_level.txt +0 -0
@@ -1,17 +1,18 @@
1
1
  import enum
2
- from collections import defaultdict
3
- from typing import List, Optional
2
+ import inspect
4
3
  from dataclasses import dataclass
4
+ from collections import defaultdict
5
+ from typing import List, Dict, Optional
5
6
 
6
7
  from langchain_core.embeddings import Embeddings
7
8
  from langchain_core.language_models import BaseChatModel
8
9
  from mindsdb_sql_parser.ast import Select, BinaryOperation, Identifier, Constant, Star
9
10
 
10
- from mindsdb.integrations.libs.vectordatabase_handler import TableField
11
- from mindsdb.interfaces.skills.sql_agent import SQLAgent
12
- from mindsdb.interfaces.storage import db
13
11
  from mindsdb.utilities import log
14
12
  from mindsdb.utilities.cache import get_cache
13
+ from mindsdb.interfaces.storage import db
14
+ from mindsdb.interfaces.skills.sql_agent import SQLAgent
15
+ from mindsdb.integrations.libs.vectordatabase_handler import TableField
15
16
 
16
17
 
17
18
  _DEFAULT_TOP_K_SIMILARITY_SEARCH = 5
@@ -45,27 +46,54 @@ class SkillData:
45
46
  agent_tables_list: Optional[List[str]]
46
47
 
47
48
  @property
48
- def tables_list(self) -> List[str]:
49
- """List of tables which may use this skill. If the list is empty, there are no restrictions.
50
- The result list is a combination of skill's and agent's tables lists.
49
+ def restriction_on_tables(self) -> Optional[Dict[str, set]]:
50
+ """Schemas and tables which agent+skill may use. The result is intersections of skill's and agent's tables lists.
51
51
 
52
52
  Returns:
53
- List[str]: List of tables.
53
+ Optional[Dict[str, set]]: allowed schemas and tables. Schemas - are keys in dict, tables - are values.
54
+ if result is None, then there are no restrictions
54
55
 
55
56
  Raises:
56
57
  ValueError: if there is no intersection between skill's and agent's list.
57
58
  This means that all tables restricted for use.
58
59
  """
59
- agent_tables_list = self.agent_tables_list or []
60
- skill_tables_list = self.params.get('tables', [])
61
- if len(skill_tables_list) > 0 and len(agent_tables_list) > 0:
62
- diff = set(skill_tables_list) & set(agent_tables_list)
63
- if len(diff) == 0:
64
- raise ValueError("There are no tables allowed for use.")
65
- return list(diff)
66
- if len(skill_tables_list) > 0:
67
- return skill_tables_list
68
- return agent_tables_list
60
+ def list_to_map(input: List) -> Dict:
61
+ agent_tables_map = defaultdict(set)
62
+ for x in input:
63
+ if isinstance(x, str):
64
+ table_name = x
65
+ schema_name = None
66
+ elif isinstance(x, dict):
67
+ table_name = x['table']
68
+ schema_name = x.get('schema')
69
+ else:
70
+ raise ValueError(f'Unexpected value in tables list: {x}')
71
+ agent_tables_map[schema_name].add(table_name)
72
+ return agent_tables_map
73
+
74
+ agent_tables_map = list_to_map(self.agent_tables_list or [])
75
+ skill_tables_map = list_to_map(self.params.get('tables', []))
76
+
77
+ if len(agent_tables_map) > 0 and len(skill_tables_map) > 0:
78
+ if len(set(agent_tables_map) & set(skill_tables_map)) == 0:
79
+ raise ValueError("Skill's and agent's allowed tables list have no shared schemas.")
80
+
81
+ intersection_tables_map = defaultdict(set)
82
+ has_intersection = False
83
+ for schema_name in agent_tables_map:
84
+ if schema_name not in skill_tables_map:
85
+ continue
86
+ intersection_tables_map[schema_name] = agent_tables_map[schema_name] & skill_tables_map[schema_name]
87
+ if len(intersection_tables_map[schema_name]) > 0:
88
+ has_intersection = True
89
+ if has_intersection is False:
90
+ raise ValueError("Skill's and agent's allowed tables list have no shared tables.")
91
+ return intersection_tables_map
92
+ if len(skill_tables_map) > 0:
93
+ return skill_tables_map
94
+ if len(agent_tables_map) > 0:
95
+ return agent_tables_map
96
+ return None
69
97
 
70
98
 
71
99
  class SkillToolController:
@@ -83,22 +111,6 @@ class SkillToolController:
83
111
  self.command_executor = ExecuteCommands(sql_session)
84
112
  return self.command_executor
85
113
 
86
- def get_sql_agent(
87
- self,
88
- database: str,
89
- include_tables: Optional[List[str]] = None,
90
- ignore_tables: Optional[List[str]] = None,
91
- sample_rows_in_table_info: int = 3,
92
- ):
93
- return SQLAgent(
94
- self.get_command_executor(),
95
- database,
96
- include_tables,
97
- ignore_tables,
98
- sample_rows_in_table_info,
99
- cache=get_cache('agent', max_size=_MAX_CACHE_SIZE)
100
- )
101
-
102
114
  def _make_text_to_sql_tools(self, skills: List[db.Skills], llm) -> List:
103
115
  '''
104
116
  Uses SQLAgent to execute tool
@@ -112,19 +124,47 @@ class SkillToolController:
112
124
  raise ImportError(
113
125
  'To use the text-to-SQL skill, please install langchain with `pip install mindsdb[langchain]`')
114
126
 
127
+ command_executor = self.get_command_executor()
128
+
115
129
  tables_list = []
116
130
  for skill in skills:
117
131
  database = skill.params['database']
118
- for table in skill.tables_list:
119
- tables_list.append(f'{database}.{table}')
120
-
121
- # use list databases
122
- database = ','.join(set(s.params['database'] for s in skills))
123
- db = MindsDBSQL(
124
- engine=self.get_command_executor(),
125
- database=database,
126
- metadata=self.get_command_executor().session.integration_controller,
127
- include_tables=tables_list
132
+ restriction_on_tables = skill.restriction_on_tables
133
+ if restriction_on_tables is None:
134
+ handler = command_executor.session.integration_controller.get_data_handler(database)
135
+ if 'all' in inspect.signature(handler.get_tables).parameters:
136
+ response = handler.get_tables(all=True)
137
+ else:
138
+ response = handler.get_tables()
139
+ # no restrictions
140
+ if 'table_schema' in response.data_frame.columns:
141
+ for _, row in response.data_frame.iterrows():
142
+ tables_list.append(f"{database}.{row['table_schema']}.{row['table_name']}")
143
+ else:
144
+ for _, row in response.data_frame.iterrows():
145
+ tables_list.append(f"{database}.{row['table_name']}")
146
+ continue
147
+ for schema_name, tables in restriction_on_tables.items():
148
+ for table in tables:
149
+ if schema_name is None:
150
+ tables_list.append(f'{database}.{table}')
151
+ else:
152
+ tables_list.append(f'{database}.{schema_name}.{table}')
153
+
154
+ sql_agent = SQLAgent(
155
+ command_executor=command_executor,
156
+ databases=list(set(s.params['database'] for s in skills)),
157
+ databases_struct={
158
+ skill.params['database']: skill.restriction_on_tables
159
+ for skill in skills
160
+ },
161
+ include_tables=tables_list,
162
+ ignore_tables=None,
163
+ sample_rows_in_table_info=3,
164
+ cache=get_cache('agent', max_size=_MAX_CACHE_SIZE)
165
+ )
166
+ db = MindsDBSQL.custom_init(
167
+ sql_agent=sql_agent
128
168
  )
129
169
 
130
170
  # Users probably don't need to configure this for now.
@@ -138,14 +178,18 @@ class SkillToolController:
138
178
  for i, tool in enumerate(sql_database_tools):
139
179
  if isinstance(tool, QuerySQLDataBaseTool):
140
180
  # Add our own custom description so our agent knows when to query this table.
141
- tool.description = (
142
- f'Use this tool if you need data about {" OR ".join(descriptions)}. '
143
- 'Use the conversation context to decide which table to query. '
144
- f'These are the available tables: {",".join(tables_list)}.\n' if len(tables_list) > 0 else '\n'
145
- f'ALWAYS consider these special cases:\n'
146
- f'- For TIMESTAMP type columns, make sure you include the time portion in your query (e.g. WHERE date_column = "2020-01-01 12:00:00")'
147
- f'Here are the rest of the instructions:\n'
148
- f'{tool.description}'
181
+ original_description = tool.description
182
+ tool.description = ''
183
+ if len(descriptions) > 0:
184
+ tool.description += f'Use this tool if you need data about {" OR ".join(descriptions)}.\n'
185
+ tool.description += 'Use the conversation context to decide which table to query.\n'
186
+ if len(tables_list) > 0:
187
+ f'These are the available tables: {",".join(tables_list)}.\n'
188
+ tool.description += (
189
+ 'ALWAYS consider these special cases:\n'
190
+ ' - For TIMESTAMP type columns, make sure you include the time portion in your query (e.g. WHERE date_column = "2020-01-01 12:00:00")\n'
191
+ 'Here are the rest of the instructions:\n'
192
+ f'{original_description}'
149
193
  )
150
194
  sql_database_tools[i] = tool
151
195
  return sql_database_tools
@@ -1,37 +1,38 @@
1
- from typing import Iterable, List, Optional
2
1
 
3
2
  import re
4
- from mindsdb_sql_parser.ast import Select, Show, Describe, Explain
3
+ import inspect
4
+ from typing import Iterable, List, Optional
5
5
 
6
6
  import pandas as pd
7
7
  from mindsdb_sql_parser import parse_sql
8
- from mindsdb_sql_parser.ast import Identifier
9
- from mindsdb.integrations.utilities.query_traversal import query_traversal
8
+ from mindsdb_sql_parser.ast import Select, Show, Describe, Explain, Identifier
10
9
 
11
10
  from mindsdb.utilities import log
12
11
  from mindsdb.utilities.context import context as ctx
12
+ from mindsdb.integrations.utilities.query_traversal import query_traversal
13
13
 
14
14
  logger = log.getLogger(__name__)
15
15
 
16
16
 
17
17
  class SQLAgent:
18
-
19
18
  def __init__(
20
19
  self,
21
20
  command_executor,
22
- database: str,
21
+ databases: List[str],
22
+ databases_struct: dict,
23
23
  include_tables: Optional[List[str]] = None,
24
24
  ignore_tables: Optional[List[str]] = None,
25
25
  sample_rows_in_table_info: int = 3,
26
26
  cache: Optional[dict] = None
27
27
  ):
28
28
  self._command_executor = command_executor
29
+ self._mindsdb_db_struct = databases_struct
29
30
 
30
31
  self._sample_rows_in_table_info = int(sample_rows_in_table_info)
31
32
 
32
33
  self._tables_to_include = include_tables
33
34
  self._tables_to_ignore = []
34
- self._databases = database.split(',')
35
+ self._databases = databases
35
36
  if not self._tables_to_include:
36
37
  # ignore_tables and include_tables should not be used together.
37
38
  # include_tables takes priority if it's set.
@@ -40,7 +41,6 @@ class SQLAgent:
40
41
 
41
42
  def _call_engine(self, query: str, database=None):
42
43
  # switch database
43
-
44
44
  ast_query = parse_sql(query.strip('`'))
45
45
  self._check_permissions(ast_query)
46
46
 
@@ -55,7 +55,6 @@ class SQLAgent:
55
55
  return ret
56
56
 
57
57
  def _check_permissions(self, ast_query):
58
-
59
58
  # check type of query
60
59
  if not isinstance(ast_query, (Select, Show, Describe, Explain)):
61
60
  raise ValueError(f"Query is not allowed: {ast_query.to_string()}")
@@ -66,14 +65,21 @@ class SQLAgent:
66
65
  if is_table and isinstance(node, Identifier):
67
66
  name1 = node.to_string()
68
67
  name2 = '.'.join(node.parts)
69
- name3 = node.parts[-1]
68
+ if len(node.parts) == 3:
69
+ name3 = '.'.join(node.parts[1:])
70
+ else:
71
+ name3 = node.parts[-1]
70
72
  if not {name1, name2, name3}.intersection(self._tables_to_include):
71
73
  raise ValueError(f"Table {name1} not found. Available tables: {', '.join(self._tables_to_include)}")
72
74
 
73
75
  query_traversal(ast_query, _check_f)
74
76
 
75
77
  def get_usable_table_names(self) -> Iterable[str]:
78
+ """Get a list of tables that the agent has access to.
76
79
 
80
+ Returns:
81
+ Iterable[str]: list with table names
82
+ """
77
83
  cache_key = f'{ctx.company_id}_{",".join(self._databases)}_tables'
78
84
 
79
85
  # first check cache and return if found
@@ -85,25 +91,52 @@ class SQLAgent:
85
91
  if self._tables_to_include:
86
92
  return self._tables_to_include
87
93
 
88
- ret = self._call_engine('show databases;')
89
- dbs = [lst[0] for lst in ret.data.to_lists() if lst[0] != 'information_schema']
90
- usable_tables = []
91
- for db in dbs:
92
- if db != 'mindsdb' and db in self._databases:
93
- try:
94
- ret = self._call_engine('show tables', database=db)
95
- tables = [lst[0] for lst in ret.data.to_lists() if lst[0] != 'information_schema']
96
- for table in tables:
97
- # By default, include all tables in a database unless expilcitly ignored.
98
- table_name = f'{db}.{table}'
99
- if table_name not in self._tables_to_ignore:
100
- usable_tables.append(table_name)
101
- except Exception as e:
102
- logger.warning('Unable to get tables for %s: %s', db, str(e))
94
+ result_tables = []
95
+
96
+ for db_name in self._mindsdb_db_struct:
97
+ handler = self._command_executor.session.integration_controller.get_data_handler(db_name)
98
+
99
+ schemas_names = list(self._mindsdb_db_struct[db_name].keys())
100
+ if len(schemas_names) > 1 and None in schemas_names:
101
+ raise Exception('default schema and named schemas can not be used in same filter')
102
+
103
+ if None in schemas_names:
104
+ # get tables only from default schema
105
+ response = handler.get_tables()
106
+ tables_in_default_schema = list(response.data_frame.table_name)
107
+ schema_tables_restrictions = self._mindsdb_db_struct[db_name][None] # None - is default schema
108
+ if schema_tables_restrictions is None:
109
+ for table_name in tables_in_default_schema:
110
+ result_tables.append([db_name, table_name])
111
+ else:
112
+ for table_name in schema_tables_restrictions:
113
+ if table_name in tables_in_default_schema:
114
+ result_tables.append([db_name, table_name])
115
+ else:
116
+ if 'all' in inspect.signature(handler.get_tables).parameters:
117
+ response = handler.get_tables(all=True)
118
+ else:
119
+ response = handler.get_tables()
120
+ response_schema_names = list(response.data_frame.table_schema.unique())
121
+ schemas_intersection = set(schemas_names) & set(response_schema_names)
122
+ if len(schemas_intersection) == 0:
123
+ raise Exception('There are no allowed schemas in ds')
124
+
125
+ for schema_name in schemas_intersection:
126
+ schema_sub_df = response.data_frame[response.data_frame['table_schema'] == schema_name]
127
+ if self._mindsdb_db_struct[db_name][schema_name] is None:
128
+ # all tables from schema allowed
129
+ for row in schema_sub_df:
130
+ result_tables.append([db_name, schema_name, row['table_name']])
131
+ else:
132
+ for table_name in self._mindsdb_db_struct[db_name][schema_name]:
133
+ if table_name in schema_sub_df['table_name'].values:
134
+ result_tables.append([db_name, schema_name, table_name])
135
+
136
+ result_tables = ['.'.join(x) for x in result_tables]
103
137
  if self._cache:
104
- self._cache.set(cache_key, set(usable_tables))
105
-
106
- return usable_tables
138
+ self._cache.set(cache_key, set(result_tables))
139
+ return result_tables
107
140
 
108
141
  def _resolve_table_names(self, table_names: List[str], all_tables: List[Identifier]) -> List[Identifier]:
109
142
  """
@@ -115,7 +148,10 @@ class SQLAgent:
115
148
  tables_idx = {}
116
149
  for table in all_tables:
117
150
  # by name
118
- tables_idx[(table.parts[-1],)] = table
151
+ if len(table.parts) == 3:
152
+ tables_idx[tuple(table.parts[1:])] = table
153
+ else:
154
+ tables_idx[(table.parts[-1],)] = table
119
155
  # by path
120
156
  tables_idx[tuple(table.parts)] = table
121
157
 
@@ -165,26 +201,31 @@ class SQLAgent:
165
201
  def _get_single_table_info(self, table: Identifier) -> str:
166
202
  if len(table.parts) < 2:
167
203
  raise ValueError(f"Database is required for table: {table}")
168
- integration, table_name = table.parts[-2:]
204
+ if len(table.parts) == 3:
205
+ integration, schema_name, table_name = table.parts[-3:]
206
+ else:
207
+ schema_name = None
208
+ integration, table_name = table.parts[-2:]
209
+
169
210
  table_str = str(table)
170
211
 
171
212
  dn = self._command_executor.session.datahub.get(integration)
172
213
 
173
214
  fields, dtypes = [], []
174
- for column in dn.get_table_columns(table_name):
215
+ for column in dn.get_table_columns(table_name, schema_name):
175
216
  fields.append(column['name'])
176
217
  dtypes.append(column.get('type', ''))
177
218
 
178
- info = f'Table named `{table_name}`\n'
179
- info += f"\n/* Sample with first {self._sample_rows_in_table_info} rows from table {table_str}:\n"
219
+ info = f'Table named `{table_str}`:\n'
220
+ info += f"\nSample with first {self._sample_rows_in_table_info} rows from table {table_str}:\n"
180
221
  info += "\t".join([field for field in fields])
181
- info += self._get_sample_rows(table_str, fields) + "\n*/"
222
+ info += self._get_sample_rows(table_str, fields) + "\n"
182
223
  info += '\nColumn data types: ' + ",\t".join(
183
- [f'`{field}` : `{dtype}`' for field, dtype in zip(fields, dtypes)]) + '\n' # noqa
224
+ [f'\n`{field}` : `{dtype}`' for field, dtype in zip(fields, dtypes)]) + '\n' # noqa
184
225
  return info
185
226
 
186
227
  def _get_sample_rows(self, table: str, fields: List[str]) -> str:
187
- command = f"select {','.join(fields)} from {table} limit {self._sample_rows_in_table_info};"
228
+ command = f"select {', '.join(fields)} from {table} limit {self._sample_rows_in_table_info};"
188
229
  try:
189
230
  ret = self._call_engine(command)
190
231
  sample_rows = ret.data.to_lists()
@@ -212,7 +212,7 @@ class Project(Base):
212
212
  )
213
213
  deleted_at = Column(DateTime)
214
214
  name = Column(String, nullable=False)
215
- company_id = Column(Integer)
215
+ company_id = Column(Integer, default=0)
216
216
  __table_args__ = (
217
217
  UniqueConstraint("name", "company_id", name="unique_project_name_company_id"),
218
218
  )
@@ -0,0 +1,88 @@
1
+ """project-company
2
+
3
+ Revision ID: c06c35f7e8e1
4
+ Revises: f6dc924079fa
5
+ Create Date: 2025-01-15 14:14:29.295834
6
+
7
+ """
8
+ from collections import defaultdict
9
+
10
+ from alembic import op
11
+ import sqlalchemy as sa
12
+ import mindsdb.interfaces.storage.db # noqa
13
+ from mindsdb.utilities import log
14
+
15
+ # revision identifiers, used by Alembic.
16
+ revision = 'c06c35f7e8e1'
17
+ down_revision = 'f6dc924079fa'
18
+ branch_labels = None
19
+ depends_on = None
20
+
21
+
22
+ logger = log.getLogger(__name__)
23
+
24
+
25
+ def upgrade():
26
+
27
+ """
28
+ convert company_id from null to 0 to make constrain works
29
+ duplicated names are renamed
30
+ """
31
+
32
+ conn = op.get_bind()
33
+ table = sa.Table(
34
+ 'project',
35
+ sa.MetaData(),
36
+ sa.Column('id', sa.Integer()),
37
+ sa.Column('name', sa.String()),
38
+ sa.Column('company_id', sa.Integer()),
39
+ )
40
+
41
+ data = conn.execute(
42
+ table
43
+ .select()
44
+ .where(table.c.company_id == sa.null())
45
+ ).fetchall()
46
+
47
+ names = defaultdict(list)
48
+ for id, name, _ in data:
49
+ names[name].append(id)
50
+
51
+ # get duplicated
52
+ for name, ids in names.items():
53
+ if len(ids) == 1:
54
+ continue
55
+
56
+ # rename all except first
57
+ for id in ids[1:]:
58
+ new_name = f'{name}__{id}'
59
+
60
+ op.execute(
61
+ table
62
+ .update()
63
+ .where(table.c.id == id)
64
+ .values({'name': new_name})
65
+ )
66
+ logger.warning(f'Found duplicated project name: {name}, renamed to: {new_name}')
67
+
68
+ op.execute(
69
+ table
70
+ .update()
71
+ .where(table.c.company_id == sa.null())
72
+ .values({'company_id': 0})
73
+ )
74
+
75
+
76
+ def downgrade():
77
+ table = sa.Table(
78
+ 'project',
79
+ sa.MetaData(),
80
+ sa.Column('company_id', sa.Integer())
81
+ )
82
+
83
+ op.execute(
84
+ table
85
+ .update()
86
+ .where(table.c.company_id == 0)
87
+ .values({'company_id': sa.null()})
88
+ )
@@ -71,10 +71,13 @@ _CACHE_MAX_SIZE = 500
71
71
 
72
72
 
73
73
  def dataframe_checksum(df: pd.DataFrame):
74
-
75
- return str_checksum(str(
76
- df.set_axis(range(len(df.columns)), axis=1).to_records(index=False)
77
- ))
74
+ original_columns = df.columns
75
+ df.columns = list(range(len(df.columns)))
76
+ result = hashlib.sha256(
77
+ str(df.values).encode()
78
+ ).hexdigest()
79
+ df.columns = original_columns
80
+ return result
78
81
 
79
82
 
80
83
  def json_checksum(obj: t.Union[dict, list]):
@@ -24,7 +24,8 @@ class Context:
24
24
  'enabled': False,
25
25
  'pointer': None,
26
26
  'tree': None
27
- }
27
+ },
28
+ 'email_confirmed': 0,
28
29
  })
29
30
 
30
31
  def __getattr__(self, name: str) -> Any:
@@ -52,6 +53,15 @@ class Context:
52
53
  def load(self, storage: dict) -> None:
53
54
  self._storage.set(storage)
54
55
 
56
+ def metadata(self, **kwargs) -> dict:
57
+ return {
58
+ 'user_id': self.user_id or "",
59
+ 'company_id': self.company_id or "",
60
+ 'session_id': self.session_id,
61
+ 'user_class': self.user_class,
62
+ **kwargs
63
+ }
64
+
55
65
 
56
66
  _context_var = ContextVar('mindsdb.context')
57
67
  context = Context(_context_var)