MindsDB 25.1.3.0__py3-none-any.whl → 25.1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- {MindsDB-25.1.3.0.dist-info → MindsDB-25.1.4.0.dist-info}/METADATA +254 -253
- {MindsDB-25.1.3.0.dist-info → MindsDB-25.1.4.0.dist-info}/RECORD +55 -52
- mindsdb/__about__.py +1 -1
- mindsdb/api/executor/__init__.py +0 -1
- mindsdb/api/executor/command_executor.py +2 -1
- mindsdb/api/executor/data_types/answer.py +1 -1
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +7 -2
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -1
- mindsdb/api/executor/sql_query/__init__.py +1 -0
- mindsdb/api/executor/sql_query/steps/map_reduce_step.py +6 -39
- mindsdb/api/http/namespaces/knowledge_bases.py +3 -3
- mindsdb/api/http/namespaces/sql.py +3 -1
- mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +2 -1
- mindsdb/api/postgres/postgres_proxy/executor/executor.py +2 -1
- mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +2 -2
- mindsdb/integrations/handlers/chromadb_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +17 -1
- mindsdb/integrations/handlers/jira_handler/jira_handler.py +15 -1
- mindsdb/integrations/handlers/jira_handler/jira_table.py +52 -31
- mindsdb/integrations/handlers/langchain_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/pinecone_handler/pinecone_handler.py +123 -72
- mindsdb/integrations/handlers/pinecone_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +9 -3
- mindsdb/integrations/handlers/slack_handler/slack_handler.py +13 -2
- mindsdb/integrations/handlers/slack_handler/slack_tables.py +21 -1
- mindsdb/integrations/libs/ml_handler_process/learn_process.py +1 -1
- mindsdb/integrations/utilities/rag/loaders/vector_store_loader/pgvector.py +1 -1
- mindsdb/integrations/utilities/rag/pipelines/rag.py +73 -18
- mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +166 -108
- mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +30 -12
- mindsdb/integrations/utilities/rag/settings.py +6 -2
- mindsdb/interfaces/agents/agents_controller.py +3 -5
- mindsdb/interfaces/agents/langchain_agent.py +112 -150
- mindsdb/interfaces/agents/langfuse_callback_handler.py +0 -37
- mindsdb/interfaces/agents/mindsdb_database_agent.py +15 -13
- mindsdb/interfaces/chatbot/chatbot_controller.py +7 -11
- mindsdb/interfaces/chatbot/chatbot_task.py +16 -5
- mindsdb/interfaces/chatbot/memory.py +58 -13
- mindsdb/interfaces/database/projects.py +17 -15
- mindsdb/interfaces/database/views.py +12 -25
- mindsdb/interfaces/knowledge_base/controller.py +6 -1
- mindsdb/interfaces/model/functions.py +15 -4
- mindsdb/interfaces/model/model_controller.py +4 -7
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +47 -38
- mindsdb/interfaces/skills/skill_tool.py +97 -53
- mindsdb/interfaces/skills/sql_agent.py +77 -36
- mindsdb/interfaces/storage/db.py +1 -1
- mindsdb/migrations/versions/2025-01-15_c06c35f7e8e1_project_company.py +88 -0
- mindsdb/utilities/context.py +2 -1
- mindsdb/utilities/langfuse.py +264 -0
- mindsdb/utilities/partitioning.py +52 -0
- mindsdb/utilities/render/sqlalchemy_render.py +7 -1
- {MindsDB-25.1.3.0.dist-info → MindsDB-25.1.4.0.dist-info}/LICENSE +0 -0
- {MindsDB-25.1.3.0.dist-info → MindsDB-25.1.4.0.dist-info}/WHEEL +0 -0
- {MindsDB-25.1.3.0.dist-info → MindsDB-25.1.4.0.dist-info}/top_level.txt +0 -0
|
@@ -1,17 +1,18 @@
|
|
|
1
1
|
import enum
|
|
2
|
-
|
|
3
|
-
from typing import List, Optional
|
|
2
|
+
import inspect
|
|
4
3
|
from dataclasses import dataclass
|
|
4
|
+
from collections import defaultdict
|
|
5
|
+
from typing import List, Dict, Optional
|
|
5
6
|
|
|
6
7
|
from langchain_core.embeddings import Embeddings
|
|
7
8
|
from langchain_core.language_models import BaseChatModel
|
|
8
9
|
from mindsdb_sql_parser.ast import Select, BinaryOperation, Identifier, Constant, Star
|
|
9
10
|
|
|
10
|
-
from mindsdb.integrations.libs.vectordatabase_handler import TableField
|
|
11
|
-
from mindsdb.interfaces.skills.sql_agent import SQLAgent
|
|
12
|
-
from mindsdb.interfaces.storage import db
|
|
13
11
|
from mindsdb.utilities import log
|
|
14
12
|
from mindsdb.utilities.cache import get_cache
|
|
13
|
+
from mindsdb.interfaces.storage import db
|
|
14
|
+
from mindsdb.interfaces.skills.sql_agent import SQLAgent
|
|
15
|
+
from mindsdb.integrations.libs.vectordatabase_handler import TableField
|
|
15
16
|
|
|
16
17
|
|
|
17
18
|
_DEFAULT_TOP_K_SIMILARITY_SEARCH = 5
|
|
@@ -45,27 +46,54 @@ class SkillData:
|
|
|
45
46
|
agent_tables_list: Optional[List[str]]
|
|
46
47
|
|
|
47
48
|
@property
|
|
48
|
-
def
|
|
49
|
-
"""
|
|
50
|
-
The result list is a combination of skill's and agent's tables lists.
|
|
49
|
+
def restriction_on_tables(self) -> Optional[Dict[str, set]]:
|
|
50
|
+
"""Schemas and tables which agent+skill may use. The result is intersections of skill's and agent's tables lists.
|
|
51
51
|
|
|
52
52
|
Returns:
|
|
53
|
-
|
|
53
|
+
Optional[Dict[str, set]]: allowed schemas and tables. Schemas - are keys in dict, tables - are values.
|
|
54
|
+
if result is None, then there are no restrictions
|
|
54
55
|
|
|
55
56
|
Raises:
|
|
56
57
|
ValueError: if there is no intersection between skill's and agent's list.
|
|
57
58
|
This means that all tables restricted for use.
|
|
58
59
|
"""
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
60
|
+
def list_to_map(input: List) -> Dict:
|
|
61
|
+
agent_tables_map = defaultdict(set)
|
|
62
|
+
for x in input:
|
|
63
|
+
if isinstance(x, str):
|
|
64
|
+
table_name = x
|
|
65
|
+
schema_name = None
|
|
66
|
+
elif isinstance(x, dict):
|
|
67
|
+
table_name = x['table']
|
|
68
|
+
schema_name = x.get('schema')
|
|
69
|
+
else:
|
|
70
|
+
raise ValueError(f'Unexpected value in tables list: {x}')
|
|
71
|
+
agent_tables_map[schema_name].add(table_name)
|
|
72
|
+
return agent_tables_map
|
|
73
|
+
|
|
74
|
+
agent_tables_map = list_to_map(self.agent_tables_list or [])
|
|
75
|
+
skill_tables_map = list_to_map(self.params.get('tables', []))
|
|
76
|
+
|
|
77
|
+
if len(agent_tables_map) > 0 and len(skill_tables_map) > 0:
|
|
78
|
+
if len(set(agent_tables_map) & set(skill_tables_map)) == 0:
|
|
79
|
+
raise ValueError("Skill's and agent's allowed tables list have no shared schemas.")
|
|
80
|
+
|
|
81
|
+
intersection_tables_map = defaultdict(set)
|
|
82
|
+
has_intersection = False
|
|
83
|
+
for schema_name in agent_tables_map:
|
|
84
|
+
if schema_name not in skill_tables_map:
|
|
85
|
+
continue
|
|
86
|
+
intersection_tables_map[schema_name] = agent_tables_map[schema_name] & skill_tables_map[schema_name]
|
|
87
|
+
if len(intersection_tables_map[schema_name]) > 0:
|
|
88
|
+
has_intersection = True
|
|
89
|
+
if has_intersection is False:
|
|
90
|
+
raise ValueError("Skill's and agent's allowed tables list have no shared tables.")
|
|
91
|
+
return intersection_tables_map
|
|
92
|
+
if len(skill_tables_map) > 0:
|
|
93
|
+
return skill_tables_map
|
|
94
|
+
if len(agent_tables_map) > 0:
|
|
95
|
+
return agent_tables_map
|
|
96
|
+
return None
|
|
69
97
|
|
|
70
98
|
|
|
71
99
|
class SkillToolController:
|
|
@@ -83,22 +111,6 @@ class SkillToolController:
|
|
|
83
111
|
self.command_executor = ExecuteCommands(sql_session)
|
|
84
112
|
return self.command_executor
|
|
85
113
|
|
|
86
|
-
def get_sql_agent(
|
|
87
|
-
self,
|
|
88
|
-
database: str,
|
|
89
|
-
include_tables: Optional[List[str]] = None,
|
|
90
|
-
ignore_tables: Optional[List[str]] = None,
|
|
91
|
-
sample_rows_in_table_info: int = 3,
|
|
92
|
-
):
|
|
93
|
-
return SQLAgent(
|
|
94
|
-
self.get_command_executor(),
|
|
95
|
-
database,
|
|
96
|
-
include_tables,
|
|
97
|
-
ignore_tables,
|
|
98
|
-
sample_rows_in_table_info,
|
|
99
|
-
cache=get_cache('agent', max_size=_MAX_CACHE_SIZE)
|
|
100
|
-
)
|
|
101
|
-
|
|
102
114
|
def _make_text_to_sql_tools(self, skills: List[db.Skills], llm) -> List:
|
|
103
115
|
'''
|
|
104
116
|
Uses SQLAgent to execute tool
|
|
@@ -112,19 +124,47 @@ class SkillToolController:
|
|
|
112
124
|
raise ImportError(
|
|
113
125
|
'To use the text-to-SQL skill, please install langchain with `pip install mindsdb[langchain]`')
|
|
114
126
|
|
|
127
|
+
command_executor = self.get_command_executor()
|
|
128
|
+
|
|
115
129
|
tables_list = []
|
|
116
130
|
for skill in skills:
|
|
117
131
|
database = skill.params['database']
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
132
|
+
restriction_on_tables = skill.restriction_on_tables
|
|
133
|
+
if restriction_on_tables is None:
|
|
134
|
+
handler = command_executor.session.integration_controller.get_data_handler(database)
|
|
135
|
+
if 'all' in inspect.signature(handler.get_tables).parameters:
|
|
136
|
+
response = handler.get_tables(all=True)
|
|
137
|
+
else:
|
|
138
|
+
response = handler.get_tables()
|
|
139
|
+
# no restrictions
|
|
140
|
+
if 'table_schema' in response.data_frame.columns:
|
|
141
|
+
for _, row in response.data_frame.iterrows():
|
|
142
|
+
tables_list.append(f"{database}.{row['table_schema']}.{row['table_name']}")
|
|
143
|
+
else:
|
|
144
|
+
for _, row in response.data_frame.iterrows():
|
|
145
|
+
tables_list.append(f"{database}.{row['table_name']}")
|
|
146
|
+
continue
|
|
147
|
+
for schema_name, tables in restriction_on_tables.items():
|
|
148
|
+
for table in tables:
|
|
149
|
+
if schema_name is None:
|
|
150
|
+
tables_list.append(f'{database}.{table}')
|
|
151
|
+
else:
|
|
152
|
+
tables_list.append(f'{database}.{schema_name}.{table}')
|
|
153
|
+
|
|
154
|
+
sql_agent = SQLAgent(
|
|
155
|
+
command_executor=command_executor,
|
|
156
|
+
databases=list(set(s.params['database'] for s in skills)),
|
|
157
|
+
databases_struct={
|
|
158
|
+
skill.params['database']: skill.restriction_on_tables
|
|
159
|
+
for skill in skills
|
|
160
|
+
},
|
|
161
|
+
include_tables=tables_list,
|
|
162
|
+
ignore_tables=None,
|
|
163
|
+
sample_rows_in_table_info=3,
|
|
164
|
+
cache=get_cache('agent', max_size=_MAX_CACHE_SIZE)
|
|
165
|
+
)
|
|
166
|
+
db = MindsDBSQL.custom_init(
|
|
167
|
+
sql_agent=sql_agent
|
|
128
168
|
)
|
|
129
169
|
|
|
130
170
|
# Users probably don't need to configure this for now.
|
|
@@ -138,14 +178,18 @@ class SkillToolController:
|
|
|
138
178
|
for i, tool in enumerate(sql_database_tools):
|
|
139
179
|
if isinstance(tool, QuerySQLDataBaseTool):
|
|
140
180
|
# Add our own custom description so our agent knows when to query this table.
|
|
141
|
-
tool.description
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
f'
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
f'
|
|
148
|
-
|
|
181
|
+
original_description = tool.description
|
|
182
|
+
tool.description = ''
|
|
183
|
+
if len(descriptions) > 0:
|
|
184
|
+
tool.description += f'Use this tool if you need data about {" OR ".join(descriptions)}.\n'
|
|
185
|
+
tool.description += 'Use the conversation context to decide which table to query.\n'
|
|
186
|
+
if len(tables_list) > 0:
|
|
187
|
+
f'These are the available tables: {",".join(tables_list)}.\n'
|
|
188
|
+
tool.description += (
|
|
189
|
+
'ALWAYS consider these special cases:\n'
|
|
190
|
+
' - For TIMESTAMP type columns, make sure you include the time portion in your query (e.g. WHERE date_column = "2020-01-01 12:00:00")\n'
|
|
191
|
+
'Here are the rest of the instructions:\n'
|
|
192
|
+
f'{original_description}'
|
|
149
193
|
)
|
|
150
194
|
sql_database_tools[i] = tool
|
|
151
195
|
return sql_database_tools
|
|
@@ -1,37 +1,38 @@
|
|
|
1
|
-
from typing import Iterable, List, Optional
|
|
2
1
|
|
|
3
2
|
import re
|
|
4
|
-
|
|
3
|
+
import inspect
|
|
4
|
+
from typing import Iterable, List, Optional
|
|
5
5
|
|
|
6
6
|
import pandas as pd
|
|
7
7
|
from mindsdb_sql_parser import parse_sql
|
|
8
|
-
from mindsdb_sql_parser.ast import Identifier
|
|
9
|
-
from mindsdb.integrations.utilities.query_traversal import query_traversal
|
|
8
|
+
from mindsdb_sql_parser.ast import Select, Show, Describe, Explain, Identifier
|
|
10
9
|
|
|
11
10
|
from mindsdb.utilities import log
|
|
12
11
|
from mindsdb.utilities.context import context as ctx
|
|
12
|
+
from mindsdb.integrations.utilities.query_traversal import query_traversal
|
|
13
13
|
|
|
14
14
|
logger = log.getLogger(__name__)
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class SQLAgent:
|
|
18
|
-
|
|
19
18
|
def __init__(
|
|
20
19
|
self,
|
|
21
20
|
command_executor,
|
|
22
|
-
|
|
21
|
+
databases: List[str],
|
|
22
|
+
databases_struct: dict,
|
|
23
23
|
include_tables: Optional[List[str]] = None,
|
|
24
24
|
ignore_tables: Optional[List[str]] = None,
|
|
25
25
|
sample_rows_in_table_info: int = 3,
|
|
26
26
|
cache: Optional[dict] = None
|
|
27
27
|
):
|
|
28
28
|
self._command_executor = command_executor
|
|
29
|
+
self._mindsdb_db_struct = databases_struct
|
|
29
30
|
|
|
30
31
|
self._sample_rows_in_table_info = int(sample_rows_in_table_info)
|
|
31
32
|
|
|
32
33
|
self._tables_to_include = include_tables
|
|
33
34
|
self._tables_to_ignore = []
|
|
34
|
-
self._databases =
|
|
35
|
+
self._databases = databases
|
|
35
36
|
if not self._tables_to_include:
|
|
36
37
|
# ignore_tables and include_tables should not be used together.
|
|
37
38
|
# include_tables takes priority if it's set.
|
|
@@ -40,7 +41,6 @@ class SQLAgent:
|
|
|
40
41
|
|
|
41
42
|
def _call_engine(self, query: str, database=None):
|
|
42
43
|
# switch database
|
|
43
|
-
|
|
44
44
|
ast_query = parse_sql(query.strip('`'))
|
|
45
45
|
self._check_permissions(ast_query)
|
|
46
46
|
|
|
@@ -55,7 +55,6 @@ class SQLAgent:
|
|
|
55
55
|
return ret
|
|
56
56
|
|
|
57
57
|
def _check_permissions(self, ast_query):
|
|
58
|
-
|
|
59
58
|
# check type of query
|
|
60
59
|
if not isinstance(ast_query, (Select, Show, Describe, Explain)):
|
|
61
60
|
raise ValueError(f"Query is not allowed: {ast_query.to_string()}")
|
|
@@ -66,14 +65,21 @@ class SQLAgent:
|
|
|
66
65
|
if is_table and isinstance(node, Identifier):
|
|
67
66
|
name1 = node.to_string()
|
|
68
67
|
name2 = '.'.join(node.parts)
|
|
69
|
-
|
|
68
|
+
if len(node.parts) == 3:
|
|
69
|
+
name3 = '.'.join(node.parts[1:])
|
|
70
|
+
else:
|
|
71
|
+
name3 = node.parts[-1]
|
|
70
72
|
if not {name1, name2, name3}.intersection(self._tables_to_include):
|
|
71
73
|
raise ValueError(f"Table {name1} not found. Available tables: {', '.join(self._tables_to_include)}")
|
|
72
74
|
|
|
73
75
|
query_traversal(ast_query, _check_f)
|
|
74
76
|
|
|
75
77
|
def get_usable_table_names(self) -> Iterable[str]:
|
|
78
|
+
"""Get a list of tables that the agent has access to.
|
|
76
79
|
|
|
80
|
+
Returns:
|
|
81
|
+
Iterable[str]: list with table names
|
|
82
|
+
"""
|
|
77
83
|
cache_key = f'{ctx.company_id}_{",".join(self._databases)}_tables'
|
|
78
84
|
|
|
79
85
|
# first check cache and return if found
|
|
@@ -85,25 +91,52 @@ class SQLAgent:
|
|
|
85
91
|
if self._tables_to_include:
|
|
86
92
|
return self._tables_to_include
|
|
87
93
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
94
|
+
result_tables = []
|
|
95
|
+
|
|
96
|
+
for db_name in self._mindsdb_db_struct:
|
|
97
|
+
handler = self._command_executor.session.integration_controller.get_data_handler(db_name)
|
|
98
|
+
|
|
99
|
+
schemas_names = list(self._mindsdb_db_struct[db_name].keys())
|
|
100
|
+
if len(schemas_names) > 1 and None in schemas_names:
|
|
101
|
+
raise Exception('default schema and named schemas can not be used in same filter')
|
|
102
|
+
|
|
103
|
+
if None in schemas_names:
|
|
104
|
+
# get tables only from default schema
|
|
105
|
+
response = handler.get_tables()
|
|
106
|
+
tables_in_default_schema = list(response.data_frame.table_name)
|
|
107
|
+
schema_tables_restrictions = self._mindsdb_db_struct[db_name][None] # None - is default schema
|
|
108
|
+
if schema_tables_restrictions is None:
|
|
109
|
+
for table_name in tables_in_default_schema:
|
|
110
|
+
result_tables.append([db_name, table_name])
|
|
111
|
+
else:
|
|
112
|
+
for table_name in schema_tables_restrictions:
|
|
113
|
+
if table_name in tables_in_default_schema:
|
|
114
|
+
result_tables.append([db_name, table_name])
|
|
115
|
+
else:
|
|
116
|
+
if 'all' in inspect.signature(handler.get_tables).parameters:
|
|
117
|
+
response = handler.get_tables(all=True)
|
|
118
|
+
else:
|
|
119
|
+
response = handler.get_tables()
|
|
120
|
+
response_schema_names = list(response.data_frame.table_schema.unique())
|
|
121
|
+
schemas_intersection = set(schemas_names) & set(response_schema_names)
|
|
122
|
+
if len(schemas_intersection) == 0:
|
|
123
|
+
raise Exception('There are no allowed schemas in ds')
|
|
124
|
+
|
|
125
|
+
for schema_name in schemas_intersection:
|
|
126
|
+
schema_sub_df = response.data_frame[response.data_frame['table_schema'] == schema_name]
|
|
127
|
+
if self._mindsdb_db_struct[db_name][schema_name] is None:
|
|
128
|
+
# all tables from schema allowed
|
|
129
|
+
for row in schema_sub_df:
|
|
130
|
+
result_tables.append([db_name, schema_name, row['table_name']])
|
|
131
|
+
else:
|
|
132
|
+
for table_name in self._mindsdb_db_struct[db_name][schema_name]:
|
|
133
|
+
if table_name in schema_sub_df['table_name'].values:
|
|
134
|
+
result_tables.append([db_name, schema_name, table_name])
|
|
135
|
+
|
|
136
|
+
result_tables = ['.'.join(x) for x in result_tables]
|
|
103
137
|
if self._cache:
|
|
104
|
-
self._cache.set(cache_key, set(
|
|
105
|
-
|
|
106
|
-
return usable_tables
|
|
138
|
+
self._cache.set(cache_key, set(result_tables))
|
|
139
|
+
return result_tables
|
|
107
140
|
|
|
108
141
|
def _resolve_table_names(self, table_names: List[str], all_tables: List[Identifier]) -> List[Identifier]:
|
|
109
142
|
"""
|
|
@@ -115,7 +148,10 @@ class SQLAgent:
|
|
|
115
148
|
tables_idx = {}
|
|
116
149
|
for table in all_tables:
|
|
117
150
|
# by name
|
|
118
|
-
|
|
151
|
+
if len(table.parts) == 3:
|
|
152
|
+
tables_idx[tuple(table.parts[1:])] = table
|
|
153
|
+
else:
|
|
154
|
+
tables_idx[(table.parts[-1],)] = table
|
|
119
155
|
# by path
|
|
120
156
|
tables_idx[tuple(table.parts)] = table
|
|
121
157
|
|
|
@@ -165,26 +201,31 @@ class SQLAgent:
|
|
|
165
201
|
def _get_single_table_info(self, table: Identifier) -> str:
|
|
166
202
|
if len(table.parts) < 2:
|
|
167
203
|
raise ValueError(f"Database is required for table: {table}")
|
|
168
|
-
|
|
204
|
+
if len(table.parts) == 3:
|
|
205
|
+
integration, schema_name, table_name = table.parts[-3:]
|
|
206
|
+
else:
|
|
207
|
+
schema_name = None
|
|
208
|
+
integration, table_name = table.parts[-2:]
|
|
209
|
+
|
|
169
210
|
table_str = str(table)
|
|
170
211
|
|
|
171
212
|
dn = self._command_executor.session.datahub.get(integration)
|
|
172
213
|
|
|
173
214
|
fields, dtypes = [], []
|
|
174
|
-
for column in dn.get_table_columns(table_name):
|
|
215
|
+
for column in dn.get_table_columns(table_name, schema_name):
|
|
175
216
|
fields.append(column['name'])
|
|
176
217
|
dtypes.append(column.get('type', ''))
|
|
177
218
|
|
|
178
|
-
info = f'Table named `{
|
|
179
|
-
info += f"\
|
|
219
|
+
info = f'Table named `{table_str}`:\n'
|
|
220
|
+
info += f"\nSample with first {self._sample_rows_in_table_info} rows from table {table_str}:\n"
|
|
180
221
|
info += "\t".join([field for field in fields])
|
|
181
|
-
info += self._get_sample_rows(table_str, fields) + "\n
|
|
222
|
+
info += self._get_sample_rows(table_str, fields) + "\n"
|
|
182
223
|
info += '\nColumn data types: ' + ",\t".join(
|
|
183
|
-
[f'`{field}` : `{dtype}`' for field, dtype in zip(fields, dtypes)]) + '\n' # noqa
|
|
224
|
+
[f'\n`{field}` : `{dtype}`' for field, dtype in zip(fields, dtypes)]) + '\n' # noqa
|
|
184
225
|
return info
|
|
185
226
|
|
|
186
227
|
def _get_sample_rows(self, table: str, fields: List[str]) -> str:
|
|
187
|
-
command = f"select {','.join(fields)} from {table} limit {self._sample_rows_in_table_info};"
|
|
228
|
+
command = f"select {', '.join(fields)} from {table} limit {self._sample_rows_in_table_info};"
|
|
188
229
|
try:
|
|
189
230
|
ret = self._call_engine(command)
|
|
190
231
|
sample_rows = ret.data.to_lists()
|
mindsdb/interfaces/storage/db.py
CHANGED
|
@@ -212,7 +212,7 @@ class Project(Base):
|
|
|
212
212
|
)
|
|
213
213
|
deleted_at = Column(DateTime)
|
|
214
214
|
name = Column(String, nullable=False)
|
|
215
|
-
company_id = Column(Integer)
|
|
215
|
+
company_id = Column(Integer, default=0)
|
|
216
216
|
__table_args__ = (
|
|
217
217
|
UniqueConstraint("name", "company_id", name="unique_project_name_company_id"),
|
|
218
218
|
)
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""project-company
|
|
2
|
+
|
|
3
|
+
Revision ID: c06c35f7e8e1
|
|
4
|
+
Revises: f6dc924079fa
|
|
5
|
+
Create Date: 2025-01-15 14:14:29.295834
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
from collections import defaultdict
|
|
9
|
+
|
|
10
|
+
from alembic import op
|
|
11
|
+
import sqlalchemy as sa
|
|
12
|
+
import mindsdb.interfaces.storage.db # noqa
|
|
13
|
+
from mindsdb.utilities import log
|
|
14
|
+
|
|
15
|
+
# revision identifiers, used by Alembic.
|
|
16
|
+
revision = 'c06c35f7e8e1'
|
|
17
|
+
down_revision = 'f6dc924079fa'
|
|
18
|
+
branch_labels = None
|
|
19
|
+
depends_on = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
logger = log.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def upgrade():
|
|
26
|
+
|
|
27
|
+
"""
|
|
28
|
+
convert company_id from null to 0 to make constrain works
|
|
29
|
+
duplicated names are renamed
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
conn = op.get_bind()
|
|
33
|
+
table = sa.Table(
|
|
34
|
+
'project',
|
|
35
|
+
sa.MetaData(),
|
|
36
|
+
sa.Column('id', sa.Integer()),
|
|
37
|
+
sa.Column('name', sa.String()),
|
|
38
|
+
sa.Column('company_id', sa.Integer()),
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
data = conn.execute(
|
|
42
|
+
table
|
|
43
|
+
.select()
|
|
44
|
+
.where(table.c.company_id == sa.null())
|
|
45
|
+
).fetchall()
|
|
46
|
+
|
|
47
|
+
names = defaultdict(list)
|
|
48
|
+
for id, name, _ in data:
|
|
49
|
+
names[name].append(id)
|
|
50
|
+
|
|
51
|
+
# get duplicated
|
|
52
|
+
for name, ids in names.items():
|
|
53
|
+
if len(ids) == 1:
|
|
54
|
+
continue
|
|
55
|
+
|
|
56
|
+
# rename all except first
|
|
57
|
+
for id in ids[1:]:
|
|
58
|
+
new_name = f'{name}__{id}'
|
|
59
|
+
|
|
60
|
+
op.execute(
|
|
61
|
+
table
|
|
62
|
+
.update()
|
|
63
|
+
.where(table.c.id == id)
|
|
64
|
+
.values({'name': new_name})
|
|
65
|
+
)
|
|
66
|
+
logger.warning(f'Found duplicated project name: {name}, renamed to: {new_name}')
|
|
67
|
+
|
|
68
|
+
op.execute(
|
|
69
|
+
table
|
|
70
|
+
.update()
|
|
71
|
+
.where(table.c.company_id == sa.null())
|
|
72
|
+
.values({'company_id': 0})
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def downgrade():
|
|
77
|
+
table = sa.Table(
|
|
78
|
+
'project',
|
|
79
|
+
sa.MetaData(),
|
|
80
|
+
sa.Column('company_id', sa.Integer())
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
op.execute(
|
|
84
|
+
table
|
|
85
|
+
.update()
|
|
86
|
+
.where(table.c.company_id == 0)
|
|
87
|
+
.values({'company_id': sa.null()})
|
|
88
|
+
)
|