asktable-advisor 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,143 @@
1
+ """Database data generator using LLM."""
2
+
3
+ import logging
4
+ from typing import Dict, Any, Optional
5
+
6
+ from ..agent.llm_client import LLMClient
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ class DataGenerator:
12
+ """
13
+ Generate realistic sample data using LLM.
14
+
15
+ Creates INSERT statements with business-logic-aware data.
16
+ """
17
+
18
+ def __init__(self, llm_client: LLMClient):
19
+ """
20
+ Initialize data generator.
21
+
22
+ Args:
23
+ llm_client: LLM client for AI-powered generation
24
+ """
25
+ self.llm_client = llm_client
26
+
27
+ def generate_data(
28
+ self,
29
+ schema_sql: str,
30
+ scenario_context: str,
31
+ data_volume: Dict[str, int],
32
+ ) -> str:
33
+ """
34
+ Generate sample data for database tables.
35
+
36
+ Args:
37
+ schema_sql: Database schema (CREATE TABLE statements)
38
+ scenario_context: Business context for generating realistic data
39
+ data_volume: Dict mapping table names to desired row counts
40
+
41
+ Returns:
42
+ SQL INSERT statements
43
+ """
44
+ logger.info(f"Generating sample data for {len(data_volume)} tables")
45
+
46
+ # Build prompt for data generation
47
+ prompt = self._build_data_prompt(
48
+ schema_sql,
49
+ scenario_context,
50
+ data_volume,
51
+ )
52
+
53
+ # Call LLM to generate data
54
+ try:
55
+ response = self.llm_client.create_message(
56
+ messages=[{"role": "user", "content": prompt}],
57
+ system=self._get_data_system_prompt(),
58
+ )
59
+
60
+ sql = self.llm_client.extract_text(response)
61
+
62
+ # Extract SQL from markdown if needed
63
+ sql = self._extract_sql_from_markdown(sql)
64
+
65
+ logger.info("Sample data generated successfully")
66
+ return sql
67
+
68
+ except Exception as e:
69
+ logger.error(f"Failed to generate data: {e}")
70
+ raise
71
+
72
+ def _build_data_prompt(
73
+ self,
74
+ schema_sql: str,
75
+ scenario_context: str,
76
+ data_volume: Dict[str, int],
77
+ ) -> str:
78
+ """Build prompt for data generation."""
79
+ prompt = f"""请为以下数据库表结构生成真实感的示例数据:
80
+
81
+ 场景上下文:
82
+ {scenario_context}
83
+
84
+ 表结构:
85
+ ```sql
86
+ {schema_sql}
87
+ ```
88
+
89
+ 数据量要求:
90
+ """
91
+
92
+ for table, count in data_volume.items():
93
+ prompt += f"- {table} 表:生成 {count} 条数据\n"
94
+
95
+ prompt += """
96
+ 生成要求:
97
+ 1. 数据要符合业务逻辑和真实场景
98
+ 2. 确保外键关系正确(引用的 ID 必须存在)
99
+ 3. 姓名、地址、商品名等使用真实感的中文内容
100
+ 4. 数值数据要有合理的分布(不要全部相同)
101
+ 5. 时间数据要分布在合理的时间范围内
102
+ 6. 使用批量 INSERT 语句(每条语句插入多行)
103
+
104
+ 只返回 INSERT INTO 语句,不要包含其他解释。
105
+ 注意:先插入父表数据,再插入子表数据(按外键依赖顺序)。
106
+ """
107
+
108
+ return prompt
109
+
110
+ def _get_data_system_prompt(self) -> str:
111
+ """Get system prompt for data generation."""
112
+ return """你是一位数据生成专家,擅长创建真实感、符合业务逻辑的示例数据。
113
+
114
+ 你的特点:
115
+ - 生成的数据具有真实性和多样性
116
+ - 严格遵守外键约束和业务规则
117
+ - 理解各种业务场景的数据特征
118
+ - 确保数据分布合理(符合真实业务规律)
119
+
120
+ 生成策略:
121
+ 1. 姓名:使用常见的中文姓名,多样化
122
+ 2. 日期:分布在合理的时间范围,考虑业务周期
123
+ 3. 金额:符合业务场景(如订单金额有大有小)
124
+ 4. 状态:不同状态合理分布(如订单状态:待付款、已付款、已发货等)
125
+ 5. 关联数据:确保外键 ID 正确关联(如订单的 user_id 必须是真实存在的用户)
126
+
127
+ 请根据表结构和业务场景,生成高质量的示例数据。
128
+ """
129
+
130
+ def _extract_sql_from_markdown(self, text: str) -> str:
131
+ """Extract SQL from markdown code blocks."""
132
+ if "```sql" in text:
133
+ start = text.find("```sql") + 6
134
+ end = text.find("```", start)
135
+ if end > start:
136
+ return text[start:end].strip()
137
+ elif "```" in text:
138
+ start = text.find("```") + 3
139
+ end = text.find("```", start)
140
+ if end > start:
141
+ return text[start:end].strip()
142
+
143
+ return text.strip()
@@ -0,0 +1,228 @@
1
+ """Database manager for AskTable Advisor."""
2
+
3
+ import logging
4
+ from typing import Optional, List
5
+
6
+ from sqlalchemy import create_engine, text
7
+ from sqlalchemy.engine import Engine
8
+ from sqlalchemy.orm import sessionmaker, Session
9
+
10
+ from ..config import AdvisorSettings
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class DatabaseManager:
16
+ """
17
+ Database connection and operation manager.
18
+
19
+ Handles MySQL database connections, SQL execution, and database creation.
20
+ """
21
+
22
+ def __init__(self, settings: AdvisorSettings):
23
+ """
24
+ Initialize database manager.
25
+
26
+ Args:
27
+ settings: Application settings with database credentials
28
+ """
29
+ self.settings = settings
30
+ self._engine: Optional[Engine] = None
31
+ self._session_maker: Optional[sessionmaker] = None
32
+
33
+ logger.info(
34
+ f"Database manager initialized: "
35
+ f"{settings.mysql_host}:{settings.mysql_port}/{settings.mysql_database}"
36
+ )
37
+
38
+ @property
39
+ def engine(self) -> Engine:
40
+ """Get or create database engine."""
41
+ if self._engine is None:
42
+ self._engine = create_engine(
43
+ self.settings.mysql_url,
44
+ pool_pre_ping=True,
45
+ pool_size=self.settings.mysql_pool_size,
46
+ max_overflow=self.settings.mysql_max_overflow,
47
+ echo=False,
48
+ )
49
+ return self._engine
50
+
51
+ @property
52
+ def session_maker(self) -> sessionmaker:
53
+ """Get or create session maker."""
54
+ if self._session_maker is None:
55
+ self._session_maker = sessionmaker(
56
+ autocommit=False,
57
+ autoflush=False,
58
+ bind=self.engine,
59
+ )
60
+ return self._session_maker
61
+
62
+ def get_session(self) -> Session:
63
+ """Create a new database session."""
64
+ return self.session_maker()
65
+
66
+ def test_connection(self) -> bool:
67
+ """
68
+ Test database connection.
69
+
70
+ Returns:
71
+ True if connection successful, False otherwise
72
+ """
73
+ try:
74
+ with self.engine.connect() as conn:
75
+ result = conn.execute(text("SELECT 1"))
76
+ result.fetchone()
77
+ logger.info("Database connection successful")
78
+ return True
79
+ except Exception as e:
80
+ logger.error(f"Database connection failed: {e}")
81
+ return False
82
+
83
+ def create_database(self, database_name: Optional[str] = None) -> None:
84
+ """
85
+ Create database if it doesn't exist.
86
+
87
+ Args:
88
+ database_name: Database name (default: use settings)
89
+ """
90
+ db_name = database_name or self.settings.mysql_database
91
+
92
+ # Create engine without database selection
93
+ engine_without_db = create_engine(
94
+ f"mysql+pymysql://{self.settings.mysql_user}:{self.settings.mysql_password}"
95
+ f"@{self.settings.mysql_host}:{self.settings.mysql_port}",
96
+ pool_pre_ping=True,
97
+ )
98
+
99
+ try:
100
+ with engine_without_db.connect() as conn:
101
+ # Use CREATE DATABASE with proper MySQL syntax
102
+ conn.execute(
103
+ text(f"CREATE DATABASE IF NOT EXISTS `{db_name}` "
104
+ f"CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci")
105
+ )
106
+ conn.commit()
107
+ logger.info(f"Database '{db_name}' ready")
108
+ except Exception as e:
109
+ logger.error(f"Failed to create database: {e}")
110
+ raise
111
+ finally:
112
+ engine_without_db.dispose()
113
+
114
+ def execute_sql(self, sql: str, description: Optional[str] = None) -> int:
115
+ """
116
+ Execute SQL statements.
117
+
118
+ Args:
119
+ sql: SQL content (may contain multiple statements separated by ;)
120
+ description: Optional description of the SQL operation
121
+
122
+ Returns:
123
+ Number of statements executed
124
+ """
125
+ if description:
126
+ logger.info(f"Executing SQL: {description}")
127
+
128
+ with self.get_session() as session:
129
+ try:
130
+ # Split by semicolon and filter empty statements
131
+ statements = [
132
+ s.strip()
133
+ for s in sql.split(";")
134
+ if s.strip() and not s.strip().startswith("--")
135
+ ]
136
+
137
+ for statement in statements:
138
+ session.execute(text(statement))
139
+
140
+ session.commit()
141
+ logger.info(f"Successfully executed {len(statements)} SQL statements")
142
+ return len(statements)
143
+
144
+ except Exception as e:
145
+ session.rollback()
146
+ logger.error(f"SQL execution failed: {e}")
147
+ logger.error(f"Failed SQL:\n{sql[:500]}...") # Log first 500 chars
148
+ raise
149
+
150
+ def execute_sql_statements(self, statements: List[str]) -> int:
151
+ """
152
+ Execute a list of SQL statements.
153
+
154
+ Args:
155
+ statements: List of SQL statements
156
+
157
+ Returns:
158
+ Number of statements executed
159
+ """
160
+ with self.get_session() as session:
161
+ try:
162
+ for statement in statements:
163
+ if statement.strip():
164
+ session.execute(text(statement))
165
+
166
+ session.commit()
167
+ logger.info(f"Successfully executed {len(statements)} SQL statements")
168
+ return len(statements)
169
+
170
+ except Exception as e:
171
+ session.rollback()
172
+ logger.error(f"SQL execution failed: {e}")
173
+ raise
174
+
175
+ def database_exists(self, database_name: Optional[str] = None) -> bool:
176
+ """
177
+ Check if database exists.
178
+
179
+ Args:
180
+ database_name: Database name (default: use settings)
181
+
182
+ Returns:
183
+ True if database exists
184
+ """
185
+ db_name = database_name or self.settings.mysql_database
186
+
187
+ engine_without_db = create_engine(
188
+ f"mysql+pymysql://{self.settings.mysql_user}:{self.settings.mysql_password}"
189
+ f"@{self.settings.mysql_host}:{self.settings.mysql_port}",
190
+ pool_pre_ping=True,
191
+ )
192
+
193
+ try:
194
+ with engine_without_db.connect() as conn:
195
+ result = conn.execute(
196
+ text("SELECT SCHEMA_NAME FROM INFORMATION_SCHEMA.SCHEMATA "
197
+ "WHERE SCHEMA_NAME = :db_name"),
198
+ {"db_name": db_name}
199
+ )
200
+ exists = result.fetchone() is not None
201
+ return exists
202
+ finally:
203
+ engine_without_db.dispose()
204
+
205
+ def list_tables(self, database_name: Optional[str] = None) -> List[str]:
206
+ """
207
+ List all tables in the database.
208
+
209
+ Args:
210
+ database_name: Database name (default: use settings)
211
+
212
+ Returns:
213
+ List of table names
214
+ """
215
+ db_name = database_name or self.settings.mysql_database
216
+
217
+ with self.engine.connect() as conn:
218
+ result = conn.execute(
219
+ text(f"SHOW TABLES FROM `{db_name}`")
220
+ )
221
+ tables = [row[0] for row in result.fetchall()]
222
+ return tables
223
+
224
+ def close(self) -> None:
225
+ """Close database connections."""
226
+ if self._engine:
227
+ self._engine.dispose()
228
+ logger.info("Database connections closed")
@@ -0,0 +1,148 @@
1
+ """Database schema generator using LLM."""
2
+
3
+ import logging
4
+ from typing import Dict, List, Any, Optional
5
+
6
+ from ..agent.llm_client import LLMClient
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ class SchemaGenerator:
12
+ """
13
+ Generate database schema using LLM.
14
+
15
+ Analyzes business scenarios and creates appropriate table structures.
16
+ """
17
+
18
+ def __init__(self, llm_client: LLMClient):
19
+ """
20
+ Initialize schema generator.
21
+
22
+ Args:
23
+ llm_client: LLM client for AI-powered generation
24
+ """
25
+ self.llm_client = llm_client
26
+
27
+ def generate_schema(
28
+ self,
29
+ scenario_name: str,
30
+ scenario_description: str,
31
+ requirements: Optional[List[str]] = None,
32
+ scale_info: Optional[Dict[str, Any]] = None,
33
+ ) -> str:
34
+ """
35
+ Generate database schema SQL for a scenario.
36
+
37
+ Args:
38
+ scenario_name: Name of the scenario
39
+ scenario_description: Detailed scenario description
40
+ requirements: List of specific requirements
41
+ scale_info: Information about data scale
42
+
43
+ Returns:
44
+ SQL DDL statements (CREATE TABLE)
45
+ """
46
+ logger.info(f"Generating schema for scenario: {scenario_name}")
47
+
48
+ # Build prompt for schema generation
49
+ prompt = self._build_schema_prompt(
50
+ scenario_name,
51
+ scenario_description,
52
+ requirements,
53
+ scale_info,
54
+ )
55
+
56
+ # Call LLM to generate schema
57
+ try:
58
+ response = self.llm_client.create_message(
59
+ messages=[{"role": "user", "content": prompt}],
60
+ system=self._get_schema_system_prompt(),
61
+ )
62
+
63
+ sql = self.llm_client.extract_text(response)
64
+
65
+ # Extract SQL from markdown code blocks if present
66
+ sql = self._extract_sql_from_markdown(sql)
67
+
68
+ logger.info(f"Schema generated successfully")
69
+ return sql
70
+
71
+ except Exception as e:
72
+ logger.error(f"Failed to generate schema: {e}")
73
+ raise
74
+
75
+ def _build_schema_prompt(
76
+ self,
77
+ scenario_name: str,
78
+ scenario_description: str,
79
+ requirements: Optional[List[str]],
80
+ scale_info: Optional[Dict[str, Any]],
81
+ ) -> str:
82
+ """Build prompt for schema generation."""
83
+ prompt = f"""请为以下业务场景设计 MySQL 数据库表结构:
84
+
85
+ 场景名称:{scenario_name}
86
+ 场景描述:{scenario_description}
87
+ """
88
+
89
+ if requirements:
90
+ prompt += f"\n具体需求:\n"
91
+ for req in requirements:
92
+ prompt += f"- {req}\n"
93
+
94
+ if scale_info:
95
+ prompt += f"\n数据规模:\n"
96
+ for key, value in scale_info.items():
97
+ prompt += f"- {key}: {value}\n"
98
+
99
+ prompt += """
100
+ 请生成完整的 CREATE TABLE 语句。要求:
101
+ 1. 每个表都要有主键(id INT AUTO_INCREMENT PRIMARY KEY)
102
+ 2. 正确设置外键关系(使用 FOREIGN KEY)
103
+ 3. 包含 created_at 和 updated_at 时间戳字段
104
+ 4. 选择合适的数据类型(INT、VARCHAR、DECIMAL、DATETIME 等)
105
+ 5. 添加必要的索引
106
+ 6. 使用 InnoDB 引擎
107
+ 7. 字符集使用 utf8mb4
108
+
109
+ 只返回 SQL 语句,不要包含其他解释文字。
110
+ """
111
+
112
+ return prompt
113
+
114
+ def _get_schema_system_prompt(self) -> str:
115
+ """Get system prompt for schema generation."""
116
+ return """你是一位经验丰富的数据库设计专家。
117
+
118
+ 你精通:
119
+ - 各种业务场景的数据建模(电商、CRM、教育、医疗等)
120
+ - MySQL 数据库设计最佳实践
121
+ - 数据库规范化原则(1NF、2NF、3NF)
122
+ - 性能优化(索引设计、查询优化)
123
+
124
+ 设计原则:
125
+ 1. 表结构清晰、字段命名规范
126
+ 2. 正确建立表关系(一对一、一对多、多对多)
127
+ 3. 适当冗余以提升查询性能
128
+ 4. 考虑数据一致性和完整性
129
+ 5. 为常用查询添加索引
130
+
131
+ 请根据业务场景,设计合理、规范的数据库表结构。
132
+ """
133
+
134
+ def _extract_sql_from_markdown(self, text: str) -> str:
135
+ """Extract SQL from markdown code blocks."""
136
+ # Check if text contains markdown SQL code block
137
+ if "```sql" in text:
138
+ start = text.find("```sql") + 6
139
+ end = text.find("```", start)
140
+ if end > start:
141
+ return text[start:end].strip()
142
+ elif "```" in text:
143
+ start = text.find("```") + 3
144
+ end = text.find("```", start)
145
+ if end > start:
146
+ return text[start:end].strip()
147
+
148
+ return text.strip()
File without changes
File without changes